Commit ca486f40 authored by Trond Myklebust's avatar Trond Myklebust

Merge bk://nfsclient.bkbits.net/linux-2.6

into fys.uio.no:/home/linux/bitkeeper/nfsclient-2.6
parents e8323593 9e84df77
......@@ -783,6 +783,54 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
security_d_instantiate(entry, inode);
}
/**
* d_instantiate_unique - instantiate a non-aliased dentry
* @entry: dentry to instantiate
* @inode: inode to attach to this dentry
*
* Fill in inode information in the entry. On success, it returns NULL.
* If an unhashed alias of "entry" already exists, then we return the
* aliased dentry instead.
*
* Note that in order to avoid conflicts with rename() etc, the caller
* had better be holding the parent directory semaphore.
*/
struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
{
struct dentry *alias;
int len = entry->d_name.len;
const char *name = entry->d_name.name;
unsigned int hash = entry->d_name.hash;
BUG_ON(!list_empty(&entry->d_alias));
spin_lock(&dcache_lock);
if (!inode)
goto do_negative;
list_for_each_entry(alias, &inode->i_dentry, d_alias) {
struct qstr *qstr = &alias->d_name;
if (qstr->hash != hash)
continue;
if (alias->d_parent != entry->d_parent)
continue;
if (qstr->len != len)
continue;
if (memcmp(qstr->name, name, len))
continue;
dget_locked(alias);
spin_unlock(&dcache_lock);
BUG_ON(!d_unhashed(alias));
return alias;
}
list_add(&entry->d_alias, &inode->i_dentry);
do_negative:
entry->d_inode = inode;
spin_unlock(&dcache_lock);
security_d_instantiate(entry, inode);
return NULL;
}
EXPORT_SYMBOL(d_instantiate_unique);
/**
* d_alloc_root - allocate root dentry
* @root_inode: inode to allocate the root for
......
......@@ -1563,9 +1563,6 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)
error = filp->f_op->lock(filp, F_GETLK, &file_lock);
if (error < 0)
goto out;
else if (error == LOCK_USE_CLNT)
/* Bypass for NFS with no locking - 2.0.36 compat */
fl = posix_test_lock(filp, &file_lock);
else
fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
} else {
......@@ -1708,9 +1705,6 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
error = filp->f_op->lock(filp, F_GETLK, &file_lock);
if (error < 0)
goto out;
else if (error == LOCK_USE_CLNT)
/* Bypass for NFS with no locking - 2.0.36 compat */
fl = posix_test_lock(filp, &file_lock);
else
fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
} else {
......
......@@ -40,8 +40,6 @@
static int nfs_opendir(struct inode *, struct file *);
static int nfs_readdir(struct file *, void *, filldir_t);
static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
static int nfs_cached_lookup(struct inode *, struct dentry *,
struct nfs_fh *, struct nfs_fattr *);
static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
static int nfs_mkdir(struct inode *, struct dentry *, int);
static int nfs_rmdir(struct inode *, struct dentry *);
......@@ -294,24 +292,13 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
return res;
}
static unsigned int nfs_type2dtype[] = {
DT_UNKNOWN,
DT_REG,
DT_DIR,
DT_BLK,
DT_CHR,
DT_LNK,
DT_SOCK,
DT_UNKNOWN,
DT_FIFO
};
static inline
unsigned int nfs_type_to_d_type(enum nfs_ftype type)
static inline unsigned int dt_type(struct inode *inode)
{
return nfs_type2dtype[type];
return (inode->i_mode >> 12) & 15;
}
static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc);
/*
* Once we've found the start of the dirent within a page: fill 'er up...
*/
......@@ -321,6 +308,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
{
struct file *file = desc->file;
struct nfs_entry *entry = desc->entry;
struct dentry *dentry = NULL;
unsigned long fileid;
int loop_count = 0,
res;
......@@ -333,9 +321,16 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
* retrieving the current dirent on the server */
fileid = nfs_fileid_to_ino_t(entry->ino);
/* Get a dentry if we have one */
if (dentry != NULL)
dput(dentry);
dentry = nfs_readdir_lookup(desc);
/* Use readdirplus info */
if (desc->plus && (entry->fattr->valid & NFS_ATTR_FATTR))
d_type = nfs_type_to_d_type(entry->fattr->type);
if (dentry != NULL && dentry->d_inode != NULL) {
d_type = dt_type(dentry->d_inode);
fileid = dentry->d_inode->i_ino;
}
res = filldir(dirent, entry->name, entry->len,
entry->prev_cookie, fileid, d_type);
......@@ -352,7 +347,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
}
}
dir_page_release(desc);
if (dentry != NULL)
dput(dentry);
dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res);
return res;
}
......@@ -615,24 +611,10 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
goto out_valid;
}
/*
* Note: we're not holding inode->i_sem and so may be racing with
* operations that change the directory. We therefore save the
* change attribute *before* we do the RPC call.
*/
verifier = nfs_save_change_attribute(dir);
error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr);
if (!error) {
if (nfs_compare_fh(NFS_FH(inode), &fhandle))
goto out_bad;
if (nfs_lookup_verify_inode(inode, isopen))
goto out_zap_parent;
goto out_valid_renew;
}
if (NFS_STALE(inode))
goto out_bad;
verifier = nfs_save_change_attribute(dir);
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
if (error)
goto out_bad;
......@@ -641,7 +623,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
if ((error = nfs_refresh_inode(inode, &fattr)) != 0)
goto out_bad;
out_valid_renew:
nfs_renew_times(dentry);
nfs_set_verifier(dentry, verifier);
out_valid:
......@@ -723,6 +704,7 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
{
struct dentry *res;
struct inode *inode = NULL;
int error;
struct nfs_fh fhandle;
......@@ -731,11 +713,11 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
dfprintk(VFS, "NFS: lookup(%s/%s)\n",
dentry->d_parent->d_name.name, dentry->d_name.name);
error = -ENAMETOOLONG;
res = ERR_PTR(-ENAMETOOLONG);
if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
goto out;
error = -ENOMEM;
res = ERR_PTR(-ENOMEM);
dentry->d_op = NFS_PROTO(dir)->dentry_ops;
lock_kernel();
......@@ -746,29 +728,27 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
if (nfs_is_exclusive_create(dir, nd))
goto no_entry;
error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr);
if (error != 0) {
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name,
&fhandle, &fattr);
if (error == -ENOENT)
goto no_entry;
if (error != 0)
goto out_unlock;
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
if (error == -ENOENT)
goto no_entry;
if (error < 0) {
res = ERR_PTR(error);
goto out_unlock;
}
error = -EACCES;
res = ERR_PTR(-EACCES);
inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
if (!inode)
goto out_unlock;
no_entry:
error = 0;
d_add(dentry, inode);
res = d_add_unique(dentry, inode);
if (res != NULL)
dentry = res;
nfs_renew_times(dentry);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
out_unlock:
unlock_kernel();
out:
BUG_ON(error > 0);
return ERR_PTR(error);
return res;
}
#ifdef CONFIG_NFS_V4
......@@ -798,15 +778,15 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd)
static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
{
struct dentry *res = NULL;
struct inode *inode = NULL;
int error = 0;
/* Check that we are indeed trying to open this file */
if (!is_atomic_open(dir, nd))
goto no_open;
if (dentry->d_name.len > NFS_SERVER(dir)->namelen) {
error = -ENAMETOOLONG;
res = ERR_PTR(-ENAMETOOLONG);
goto out;
}
dentry->d_op = NFS_PROTO(dir)->dentry_ops;
......@@ -828,7 +808,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
inode = nfs4_atomic_open(dir, dentry, nd);
unlock_kernel();
if (IS_ERR(inode)) {
error = PTR_ERR(inode);
int error = PTR_ERR(inode);
switch (error) {
/* Make a negative dentry */
case -ENOENT:
......@@ -841,16 +821,18 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
/* case -EISDIR: */
/* case -EINVAL: */
default:
res = ERR_PTR(error);
goto out;
}
}
no_entry:
d_add(dentry, inode);
res = d_add_unique(dentry, inode);
if (res != NULL)
dentry = res;
nfs_renew_times(dentry);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
out:
BUG_ON(error > 0);
return ERR_PTR(error);
return res;
no_open:
return nfs_lookup(dir, dentry, nd);
}
......@@ -906,83 +888,51 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
}
#endif /* CONFIG_NFSV4 */
static inline
int find_dirent_name(nfs_readdir_descriptor_t *desc, struct page *page, struct dentry *dentry)
static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
{
struct dentry *parent = desc->file->f_dentry;
struct inode *dir = parent->d_inode;
struct nfs_entry *entry = desc->entry;
int status;
while((status = dir_decode(desc)) == 0) {
if (entry->len != dentry->d_name.len)
continue;
if (memcmp(entry->name, dentry->d_name.name, entry->len))
continue;
if (!(entry->fattr->valid & NFS_ATTR_FATTR))
continue;
break;
}
return status;
}
/*
* Use the cached Readdirplus results in order to avoid a LOOKUP call
* whenever we believe that the parent directory has not changed.
*
* We assume that any file creation/rename changes the directory mtime.
* As this results in a page cache invalidation whenever it occurs,
* we don't require any other tests for cache coherency.
*/
static
int nfs_cached_lookup(struct inode *dir, struct dentry *dentry,
struct nfs_fh *fh, struct nfs_fattr *fattr)
{
nfs_readdir_descriptor_t desc;
struct nfs_server *server;
struct nfs_entry entry;
struct page *page;
unsigned long timestamp;
int res;
if (!NFS_USE_READDIRPLUS(dir))
return -ENOENT;
server = NFS_SERVER(dir);
/* Don't use readdirplus unless the cache is stable */
if ((server->flags & NFS_MOUNT_NOAC) != 0
|| nfs_caches_unstable(dir)
|| nfs_attribute_timeout(dir))
return -ENOENT;
if ((NFS_FLAGS(dir) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) != 0)
return -ENOENT;
timestamp = NFS_I(dir)->readdir_timestamp;
entry.fh = fh;
entry.fattr = fattr;
desc.decode = NFS_PROTO(dir)->decode_dirent;
desc.entry = &entry;
desc.page_index = 0;
desc.plus = 1;
for(;(page = find_get_page(dir->i_mapping, desc.page_index)); desc.page_index++) {
res = -EIO;
if (PageUptodate(page)) {
void * kaddr = kmap_atomic(page, KM_USER0);
desc.ptr = kaddr;
res = find_dirent_name(&desc, page, dentry);
kunmap_atomic(kaddr, KM_USER0);
}
page_cache_release(page);
struct dentry *dentry, *alias;
struct qstr name = {
.name = entry->name,
.len = entry->len,
};
struct inode *inode;
if (res == 0)
goto out_found;
if (res != -EAGAIN)
switch (name.len) {
case 2:
if (name.name[0] == '.' && name.name[1] == '.')
return dget_parent(parent);
break;
case 1:
if (name.name[0] == '.')
return dget(parent);
}
return -ENOENT;
out_found:
fattr->timestamp = timestamp;
return 0;
name.hash = full_name_hash(name.name, name.len);
dentry = d_lookup(parent, &name);
if (dentry != NULL)
return dentry;
if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
return NULL;
/* Note: caller is already holding the dir->i_sem! */
dentry = d_alloc(parent, &name);
if (dentry == NULL)
return NULL;
dentry->d_op = NFS_PROTO(dir)->dentry_ops;
inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
if (!inode) {
dput(dentry);
return NULL;
}
alias = d_add_unique(dentry, inode);
if (alias != NULL) {
dput(dentry);
dentry = alias;
}
nfs_renew_times(dentry);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
return dentry;
}
/*
......@@ -1045,15 +995,9 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
if (nd && (nd->flags & LOOKUP_CREATE))
open_flags = nd->intent.open.flags;
/*
* The 0 argument passed into the create function should one day
* contain the O_EXCL flag if requested. This allows NFSv3 to
* select the appropriate create strategy. Currently open_namei
* does not pass the create flags.
*/
lock_kernel();
nfs_begin_data_update(dir);
inode = NFS_PROTO(dir)->create(dir, &dentry->d_name, &attr, open_flags);
inode = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
nfs_end_data_update(dir);
if (!IS_ERR(inode)) {
d_instantiate(dentry, inode);
......@@ -1438,7 +1382,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto go_ahead;
if (S_ISDIR(new_inode->i_mode))
goto out;
else if (atomic_read(&new_dentry->d_count) > 1) {
else if (atomic_read(&new_dentry->d_count) > 2) {
int err;
/* copy the target dentry's name */
dentry = d_alloc(new_dentry->d_parent,
......@@ -1453,10 +1397,8 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_inode = NULL;
/* instantiate the replacement target */
d_instantiate(new_dentry, NULL);
}
} else if (atomic_read(&new_dentry->d_count) > 1) {
/* dentry still busy? */
if (atomic_read(&new_dentry->d_count) > 1) {
#ifdef NFS_PARANOIA
printk("nfs_rename: target %s/%s busy, d_count=%d\n",
new_dentry->d_parent->d_name.name,
......@@ -1510,7 +1452,7 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs
if (cache->cred != cred
|| time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
|| (NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR))
|| (NFS_FLAGS(inode) & NFS_INO_INVALID_ACCESS))
return -ENOENT;
memcpy(res, cache, sizeof(*res));
return 0;
......@@ -1524,6 +1466,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
if (cache->cred)
put_rpccred(cache->cred);
cache->cred = get_rpccred(set->cred);
NFS_FLAGS(inode) &= ~NFS_INO_INVALID_ACCESS;
}
cache->jiffies = set->jiffies;
cache->mask = set->mask;
......
......@@ -33,6 +33,7 @@
* 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy
* 08 Jun 2003 Port to 2.5 APIs --cel
* 31 Mar 2004 Handle direct I/O without VFS support --cel
* 15 Sep 2004 Parallel async reads --cel
*
*/
......@@ -43,6 +44,7 @@
#include <linux/smp_lock.h>
#include <linux/file.h>
#include <linux/pagemap.h>
#include <linux/kref.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
......@@ -50,11 +52,27 @@
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/atomic.h>
#define NFSDBG_FACILITY NFSDBG_VFS
#define VERF_SIZE (2 * sizeof(__u32))
#define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT)
static kmem_cache_t *nfs_direct_cachep;
/*
* This represents a set of asynchronous requests that we're waiting on
*/
struct nfs_direct_req {
struct kref kref; /* release manager */
struct list_head list; /* nfs_read_data structs */
wait_queue_head_t wait; /* wait for i/o completion */
struct page ** pages; /* pages in our buffer */
unsigned int npages; /* count of pages */
atomic_t complete, /* i/os we're waiting for */
count, /* bytes actually processed */
error; /* any reported error */
};
/**
* nfs_get_user_pages - find and set up pages underlying user's buffer
......@@ -71,7 +89,8 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
unsigned long page_count;
size_t array_size;
/* set an arbitrary limit to prevent arithmetic overflow */
/* set an arbitrary limit to prevent type overflow */
/* XXX: this can probably be as large as INT_MAX */
if (size > MAX_DIRECTIO_SIZE)
return -EFBIG;
......@@ -93,6 +112,8 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
/**
* nfs_free_user_pages - tear down page struct array
* @pages: array of page struct pointers underlying target buffer
* @npages: number of pages in the array
* @do_dirty: dirty the pages as we release them
*/
static void
nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
......@@ -107,77 +128,231 @@ nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
}
/**
* nfs_direct_read_seg - Read in one iov segment. Generate separate
* read RPCs for each "rsize" bytes.
* @inode: target inode
* @ctx: target file open context
* user_addr: starting address of this segment of user's buffer
* count: size of this segment
* file_offset: offset in file to begin the operation
* @pages: array of addresses of page structs defining user's buffer
* nr_pages: size of pages array
* nfs_direct_req_release - release nfs_direct_req structure for direct read
* @kref: kref object embedded in an nfs_direct_req structure
*
*/
static int
nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx,
unsigned long user_addr, size_t count, loff_t file_offset,
struct page **pages, int nr_pages)
static void nfs_direct_req_release(struct kref *kref)
{
const unsigned int rsize = NFS_SERVER(inode)->rsize;
int tot_bytes = 0;
int curpage = 0;
struct nfs_read_data rdata = {
.inode = inode,
.cred = ctx->cred,
.args = {
.fh = NFS_FH(inode),
.context = ctx,
},
.res = {
.fattr = &rdata.fattr,
},
};
struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
kmem_cache_free(nfs_direct_cachep, dreq);
}
/**
* nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
* @count: count of bytes for the read request
* @rsize: local rsize setting
*
* Note we also set the number of requests we have in the dreq when we are
* done. This prevents races with I/O completion so we will always wait
* until all requests have been dispatched and completed.
*/
static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
{
struct list_head *list;
struct nfs_direct_req *dreq;
unsigned int reads = 0;
dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
if (!dreq)
return NULL;
kref_init(&dreq->kref);
init_waitqueue_head(&dreq->wait);
INIT_LIST_HEAD(&dreq->list);
atomic_set(&dreq->count, 0);
atomic_set(&dreq->error, 0);
list = &dreq->list;
for(;;) {
struct nfs_read_data *data = nfs_readdata_alloc();
if (unlikely(!data)) {
while (!list_empty(list)) {
data = list_entry(list->next,
struct nfs_read_data, pages);
list_del(&data->pages);
nfs_readdata_free(data);
}
kref_put(&dreq->kref, nfs_direct_req_release);
return NULL;
}
INIT_LIST_HEAD(&data->pages);
list_add(&data->pages, list);
rdata.args.pgbase = user_addr & ~PAGE_MASK;
rdata.args.offset = file_offset;
do {
int result;
data->req = (struct nfs_page *) dreq;
reads++;
if (nbytes <= rsize)
break;
nbytes -= rsize;
}
kref_get(&dreq->kref);
atomic_set(&dreq->complete, reads);
return dreq;
}
/**
* nfs_direct_read_result - handle a read reply for a direct read request
* @data: address of NFS READ operation control block
* @status: status of this NFS READ operation
*
* We must hold a reference to all the pages in this direct read request
* until the RPCs complete. This could be long *after* we are woken up in
* nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
*/
static void nfs_direct_read_result(struct nfs_read_data *data, int status)
{
struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
rdata.args.count = count;
if (rdata.args.count > rsize)
rdata.args.count = rsize;
rdata.args.pages = &pages[curpage];
if (likely(status >= 0))
atomic_add(data->res.count, &dreq->count);
else
atomic_set(&dreq->error, status);
dprintk("NFS: direct read: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
rdata.args.count, (long long) rdata.args.offset,
user_addr + tot_bytes, rdata.args.pgbase, curpage);
if (unlikely(atomic_dec_and_test(&dreq->complete))) {
nfs_free_user_pages(dreq->pages, dreq->npages, 1);
wake_up(&dreq->wait);
kref_put(&dreq->kref, nfs_direct_req_release);
}
}
/**
* nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
* @dreq: address of nfs_direct_req struct for this request
* @inode: target inode
* @ctx: target file open context
* @user_addr: starting address of this segment of user's buffer
* @count: size of this segment
* @file_offset: offset in file to begin the operation
*
* For each nfs_read_data struct that was allocated on the list, dispatch
* an NFS READ operation
*/
static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
struct inode *inode, struct nfs_open_context *ctx,
unsigned long user_addr, size_t count, loff_t file_offset)
{
struct list_head *list = &dreq->list;
struct page **pages = dreq->pages;
unsigned int curpage, pgbase;
unsigned int rsize = NFS_SERVER(inode)->rsize;
curpage = 0;
pgbase = user_addr & ~PAGE_MASK;
do {
struct nfs_read_data *data;
unsigned int bytes;
bytes = rsize;
if (count < rsize)
bytes = count;
data = list_entry(list->next, struct nfs_read_data, pages);
list_del_init(&data->pages);
data->inode = inode;
data->cred = ctx->cred;
data->args.fh = NFS_FH(inode);
data->args.context = ctx;
data->args.offset = file_offset;
data->args.pgbase = pgbase;
data->args.pages = &pages[curpage];
data->args.count = bytes;
data->res.fattr = &data->fattr;
data->res.eof = 0;
data->res.count = bytes;
NFS_PROTO(inode)->read_setup(data);
data->task.tk_cookie = (unsigned long) inode;
data->task.tk_calldata = data;
data->task.tk_release = nfs_readdata_release;
data->complete = nfs_direct_read_result;
lock_kernel();
result = NFS_PROTO(inode)->read(&rdata);
rpc_execute(&data->task);
unlock_kernel();
if (result <= 0) {
if (tot_bytes > 0)
break;
if (result == -EISDIR)
result = -EINVAL;
return result;
}
dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
bytes,
(unsigned long long)data->args.offset);
tot_bytes += result;
if (rdata.res.eof)
break;
file_offset += bytes;
pgbase += bytes;
curpage += pgbase >> PAGE_SHIFT;
pgbase &= ~PAGE_MASK;
rdata.args.offset += result;
rdata.args.pgbase += result;
curpage += rdata.args.pgbase >> PAGE_SHIFT;
rdata.args.pgbase &= ~PAGE_MASK;
count -= result;
count -= bytes;
} while (count != 0);
}
/**
* nfs_direct_read_wait - wait for I/O completion for direct reads
* @dreq: request on which we are to wait
* @intr: whether or not this wait can be interrupted
*
* Collects and returns the final error value/byte-count.
*/
static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
{
int result = 0;
if (intr) {
result = wait_event_interruptible(dreq->wait,
(atomic_read(&dreq->complete) == 0));
} else {
wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
}
/* XXX: should we zero the rest of the user's buffer if we
* hit eof? */
if (!result)
result = atomic_read(&dreq->error);
if (!result)
result = atomic_read(&dreq->count);
return tot_bytes;
kref_put(&dreq->kref, nfs_direct_req_release);
return (ssize_t) result;
}
/**
* nfs_direct_read_seg - Read in one iov segment. Generate separate
* read RPCs for each "rsize" bytes.
* @inode: target inode
* @ctx: target file open context
* @user_addr: starting address of this segment of user's buffer
* @count: size of this segment
* @file_offset: offset in file to begin the operation
* @pages: array of addresses of page structs defining user's buffer
* @nr_pages: number of pages in the array
*
*/
static ssize_t nfs_direct_read_seg(struct inode *inode,
struct nfs_open_context *ctx, unsigned long user_addr,
size_t count, loff_t file_offset, struct page **pages,
unsigned int nr_pages)
{
ssize_t result;
sigset_t oldset;
struct rpc_clnt *clnt = NFS_CLIENT(inode);
struct nfs_direct_req *dreq;
dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize);
if (!dreq)
return -ENOMEM;
dreq->pages = pages;
dreq->npages = nr_pages;
rpc_clnt_sigmask(clnt, &oldset);
nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
file_offset);
result = nfs_direct_read_wait(dreq, clnt->cl_intr);
rpc_clnt_sigunmask(clnt, &oldset);
return result;
}
/**
......@@ -189,9 +364,8 @@ nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx,
* file_offset: offset in file to begin the operation
* nr_segs: size of iovec array
*
* generic_file_direct_IO has already pushed out any non-direct
* writes so that this read will see them when we read from the
* server.
* We've already pushed out any non-direct writes so that this read
* will see them when we read from the server.
*/
static ssize_t
nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
......@@ -220,8 +394,6 @@ nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
result = nfs_direct_read_seg(inode, ctx, user_addr, size,
file_offset, pages, page_count);
nfs_free_user_pages(pages, page_count, 1);
if (result <= 0) {
if (tot_bytes > 0)
break;
......@@ -247,31 +419,31 @@ nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
* @pages: array of addresses of page structs defining user's buffer
* nr_pages: size of pages array
*/
static int
nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx,
unsigned long user_addr, size_t count, loff_t file_offset,
struct page **pages, int nr_pages)
static ssize_t nfs_direct_write_seg(struct inode *inode,
struct nfs_open_context *ctx, unsigned long user_addr,
size_t count, loff_t file_offset, struct page **pages,
int nr_pages)
{
const unsigned int wsize = NFS_SERVER(inode)->wsize;
size_t request;
int curpage, need_commit, result, tot_bytes;
int curpage, need_commit;
ssize_t result, tot_bytes;
struct nfs_writeverf first_verf;
struct nfs_write_data wdata = {
.inode = inode,
.cred = ctx->cred,
.args = {
.fh = NFS_FH(inode),
.context = ctx,
},
.res = {
.fattr = &wdata.fattr,
.verf = &wdata.verf,
},
};
struct nfs_write_data *wdata;
wdata.args.stable = NFS_UNSTABLE;
wdata = nfs_writedata_alloc();
if (!wdata)
return -ENOMEM;
wdata->inode = inode;
wdata->cred = ctx->cred;
wdata->args.fh = NFS_FH(inode);
wdata->args.context = ctx;
wdata->args.stable = NFS_UNSTABLE;
if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
wdata.args.stable = NFS_FILE_SYNC;
wdata->args.stable = NFS_FILE_SYNC;
wdata->res.fattr = &wdata->fattr;
wdata->res.verf = &wdata->verf;
nfs_begin_data_update(inode);
retry:
......@@ -279,20 +451,20 @@ nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx,
tot_bytes = 0;
curpage = 0;
request = count;
wdata.args.pgbase = user_addr & ~PAGE_MASK;
wdata.args.offset = file_offset;
do {
wdata.args.count = request;
if (wdata.args.count > wsize)
wdata.args.count = wsize;
wdata.args.pages = &pages[curpage];
wdata->args.pgbase = user_addr & ~PAGE_MASK;
wdata->args.offset = file_offset;
do {
wdata->args.count = request;
if (wdata->args.count > wsize)
wdata->args.count = wsize;
wdata->args.pages = &pages[curpage];
dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
wdata.args.count, (long long) wdata.args.offset,
user_addr + tot_bytes, wdata.args.pgbase, curpage);
wdata->args.count, (long long) wdata->args.offset,
user_addr + tot_bytes, wdata->args.pgbase, curpage);
lock_kernel();
result = NFS_PROTO(inode)->write(&wdata);
result = NFS_PROTO(inode)->write(wdata);
unlock_kernel();
if (result <= 0) {
......@@ -302,20 +474,25 @@ nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx,
}
if (tot_bytes == 0)
memcpy(&first_verf.verifier, &wdata.verf.verifier,
VERF_SIZE);
if (wdata.verf.committed != NFS_FILE_SYNC) {
memcpy(&first_verf.verifier, &wdata->verf.verifier,
sizeof(first_verf.verifier));
if (wdata->verf.committed != NFS_FILE_SYNC) {
need_commit = 1;
if (memcmp(&first_verf.verifier,
&wdata.verf.verifier, VERF_SIZE))
if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
sizeof(first_verf.verifier)));
goto sync_retry;
}
tot_bytes += result;
wdata.args.offset += result;
wdata.args.pgbase += result;
curpage += wdata.args.pgbase >> PAGE_SHIFT;
wdata.args.pgbase &= ~PAGE_MASK;
tot_bytes += result;
/* in case of a short write: stop now, let the app recover */
if (result < wdata->args.count)
break;
wdata->args.offset += result;
wdata->args.pgbase += result;
curpage += wdata->args.pgbase >> PAGE_SHIFT;
wdata->args.pgbase &= ~PAGE_MASK;
request -= result;
} while (request != 0);
......@@ -323,27 +500,27 @@ nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx,
* Commit data written so far, even in the event of an error
*/
if (need_commit) {
wdata.args.count = tot_bytes;
wdata.args.offset = file_offset;
wdata->args.count = tot_bytes;
wdata->args.offset = file_offset;
lock_kernel();
result = NFS_PROTO(inode)->commit(&wdata);
result = NFS_PROTO(inode)->commit(wdata);
unlock_kernel();
if (result < 0 || memcmp(&first_verf.verifier,
&wdata.verf.verifier,
VERF_SIZE) != 0)
&wdata->verf.verifier,
sizeof(first_verf.verifier)) != 0)
goto sync_retry;
}
result = tot_bytes;
out:
nfs_end_data_update_defer(inode);
nfs_writedata_free(wdata);
return result;
sync_retry:
wdata.args.stable = NFS_FILE_SYNC;
wdata->args.stable = NFS_FILE_SYNC;
goto retry;
}
......@@ -360,9 +537,9 @@ nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx,
* that non-direct readers might access, so they will pick up these
* writes immediately.
*/
static int nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx,
const struct iovec *iov, loff_t file_offset,
unsigned long nr_segs)
static ssize_t nfs_direct_write(struct inode *inode,
struct nfs_open_context *ctx, const struct iovec *iov,
loff_t file_offset, unsigned long nr_segs)
{
ssize_t tot_bytes = 0;
unsigned long seg = 0;
......@@ -501,6 +678,8 @@ nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t
if (mapping->nrpages) {
retval = filemap_fdatawrite(mapping);
if (retval == 0)
retval = nfs_wb_all(inode);
if (retval == 0)
retval = filemap_fdatawait(mapping);
if (retval)
......@@ -590,6 +769,8 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
if (mapping->nrpages) {
retval = filemap_fdatawrite(mapping);
if (retval == 0)
retval = nfs_wb_all(inode);
if (retval == 0)
retval = filemap_fdatawait(mapping);
if (retval)
......@@ -605,3 +786,21 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
out:
return retval;
}
int nfs_init_directcache(void)
{
nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
sizeof(struct nfs_direct_req),
0, SLAB_RECLAIM_ACCOUNT,
NULL, NULL);
if (nfs_direct_cachep == NULL)
return -ENOMEM;
return 0;
}
void nfs_destroy_directcache(void)
{
if (kmem_cache_destroy(nfs_direct_cachep))
printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n");
}
......@@ -295,10 +295,19 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = filp->f_mapping->host;
int status;
int status = 0;
lock_kernel();
status = NFS_PROTO(inode)->lock(filp, cmd, fl);
/* Use local locking if mounted with "-onolock" */
if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
status = NFS_PROTO(inode)->lock(filp, cmd, fl);
else {
struct file_lock *cfl = posix_test_lock(filp, fl);
if (cfl != NULL) {
memcpy(fl, cfl, sizeof(*fl));
fl->fl_type = F_UNLCK;
}
}
unlock_kernel();
return status;
}
......@@ -325,7 +334,11 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
* still need to complete the unlock.
*/
lock_kernel();
status = NFS_PROTO(inode)->lock(filp, cmd, fl);
/* Use local locking if mounted with "-onolock" */
if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
status = NFS_PROTO(inode)->lock(filp, cmd, fl);
else
status = posix_lock_file_wait(filp, fl);
rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
return status;
}
......@@ -351,15 +364,19 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
return status;
lock_kernel();
status = NFS_PROTO(inode)->lock(filp, cmd, fl);
/* If we were signalled we still need to ensure that
* we clean up any state on the server. We therefore
* record the lock call as having succeeded in order to
* ensure that locks_remove_posix() cleans it out when
* the process exits.
*/
if (status == -EINTR || status == -ERESTARTSYS)
posix_lock_file(filp, fl);
/* Use local locking if mounted with "-onolock" */
if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) {
status = NFS_PROTO(inode)->lock(filp, cmd, fl);
/* If we were signalled we still need to ensure that
* we clean up any state on the server. We therefore
* record the lock call as having succeeded in order to
* ensure that locks_remove_posix() cleans it out when
* the process exits.
*/
if (status == -EINTR || status == -ERESTARTSYS)
posix_lock_file(filp, fl);
} else
status = posix_lock_file_wait(filp, fl);
unlock_kernel();
if (status < 0)
return status;
......@@ -396,15 +413,6 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
return -ENOLCK;
if (NFS_PROTO(inode)->version != 4) {
/* Fake OK code if mounted without NLM support */
if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) {
if (IS_GETLK(cmd))
return LOCK_USE_CLNT;
return 0;
}
}
/*
* No BSD flocks over NFS allowed.
* Note: we could try to fake a POSIX lock request here by
......
......@@ -486,13 +486,27 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf)
if (error < 0)
goto out_err;
buf->f_frsize = server->wtmult;
/*
* Current versions of glibc do not correctly handle the
* case where f_frsize != f_bsize. Eventually we want to
* report the value of wtmult in this field.
*/
buf->f_frsize = sb->s_blocksize;
/*
* On most *nix systems, f_blocks, f_bfree, and f_bavail
* are reported in units of f_frsize. Linux hasn't had
* an f_frsize field in its statfs struct until recently,
* thus historically Linux's sys_statfs reports these
* fields in units of f_bsize.
*/
buf->f_bsize = sb->s_blocksize;
blockbits = sb->s_blocksize_bits;
blockres = (1 << blockbits) - 1;
buf->f_blocks = (res.tbytes + blockres) >> blockbits;
buf->f_bfree = (res.fbytes + blockres) >> blockbits;
buf->f_bavail = (res.abytes + blockres) >> blockbits;
buf->f_files = res.tfiles;
buf->f_ffree = res.afiles;
......@@ -565,9 +579,9 @@ nfs_zap_caches(struct inode *inode)
memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
else
nfsi->flags |= NFS_INO_INVALID_ATTR;
nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
}
/*
......@@ -605,7 +619,7 @@ nfs_find_actor(struct inode *inode, void *opaque)
return 0;
if (nfs_compare_fh(NFS_FH(inode), fh))
return 0;
if (is_bad_inode(inode))
if (is_bad_inode(inode) || NFS_STALE(inode))
return 0;
return 1;
}
......@@ -766,13 +780,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
vmtruncate(inode, attr->ia_size);
}
}
if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred;
if (*cred) {
put_rpccred(*cred);
*cred = NULL;
}
}
if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS;
nfs_end_data_update(inode);
unlock_kernel();
return error;
......@@ -949,14 +958,14 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
lock_kernel();
if (!inode || is_bad_inode(inode))
goto out_nowait;
if (NFS_STALE(inode) && inode != inode->i_sb->s_root->d_inode)
if (NFS_STALE(inode))
goto out_nowait;
while (NFS_REVALIDATING(inode)) {
status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING);
if (status < 0)
goto out_nowait;
if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOAC)
if (NFS_ATTRTIMEO(inode) == 0)
continue;
if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME))
continue;
......@@ -968,14 +977,14 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
/* Protect against RPC races by saving the change attribute */
verifier = nfs_save_change_attribute(inode);
status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
if (status) {
if (status != 0) {
dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
inode->i_sb->s_id,
(long long)NFS_FILEID(inode), status);
if (status == -ESTALE) {
NFS_FLAGS(inode) |= NFS_INO_STALE;
if (inode != inode->i_sb->s_root->d_inode)
remove_inode_hash(inode);
nfs_zap_caches(inode);
if (!S_ISDIR(inode->i_mode))
NFS_FLAGS(inode) |= NFS_INO_STALE;
}
goto out;
}
......@@ -1014,7 +1023,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
inode->i_sb->s_id,
(long long)NFS_FILEID(inode));
NFS_FLAGS(inode) &= ~NFS_INO_STALE;
out:
NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING;
wake_up(&nfsi->nfs_i_wait);
......@@ -1161,7 +1169,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
|| inode->i_uid != fattr->uid
|| inode->i_gid != fattr->gid)
nfsi->flags |= NFS_INO_INVALID_ATTR;
nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
/* Has the link count changed? */
if (inode->i_nlink != fattr->nlink)
......@@ -1270,7 +1278,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
#endif
nfsi->change_attr = fattr->change_attr;
if (!data_unstable)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
}
memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
......@@ -1278,14 +1286,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
inode->i_uid != fattr->uid ||
inode->i_gid != fattr->gid) {
struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred;
if (*cred) {
put_rpccred(*cred);
*cred = NULL;
}
invalid |= NFS_INO_INVALID_ATTR;
}
inode->i_gid != fattr->gid)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
inode->i_mode = fattr->mode;
inode->i_nlink = fattr->nlink;
......@@ -1335,7 +1337,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
*/
nfs_invalidate_inode(inode);
out_err:
return -EIO;
NFS_FLAGS(inode) |= NFS_INO_STALE;
return -ESTALE;
}
/*
......@@ -1449,8 +1452,6 @@ static void nfs_kill_super(struct super_block *s)
kill_anon_super(s);
nfs4_renewd_prepare_shutdown(server);
if (server->client != NULL && !IS_ERR(server->client))
rpc_shutdown_client(server->client);
if (server->client_sys != NULL && !IS_ERR(server->client_sys))
......@@ -1461,8 +1462,6 @@ static void nfs_kill_super(struct super_block *s)
rpciod_down(); /* release rpciod */
destroy_nfsv4_state(server);
if (server->hostname != NULL)
kfree(server->hostname);
kfree(server);
......@@ -1543,9 +1542,6 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
server->wsize = nfs_block_size(data->wsize, NULL);
server->flags = data->flags & NFS_MOUNT_FLAGMASK;
/* NFSv4 doesn't use NLM locking */
server->flags |= NFS_MOUNT_NONLM;
server->acregmin = data->acregmin*HZ;
server->acregmax = data->acregmax*HZ;
server->acdirmin = data->acdirmin*HZ;
......@@ -1790,8 +1786,22 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
static void nfs4_kill_super(struct super_block *sb)
{
struct nfs_server *server = NFS_SB(sb);
nfs_return_all_delegations(sb);
nfs_kill_super(sb);
kill_anon_super(sb);
nfs4_renewd_prepare_shutdown(server);
if (server->client != NULL && !IS_ERR(server->client))
rpc_shutdown_client(server->client);
rpciod_down(); /* release rpciod */
destroy_nfsv4_state(server);
if (server->hostname != NULL)
kfree(server->hostname);
kfree(server);
}
static struct file_system_type nfs4_fs_type = {
......@@ -1821,9 +1831,13 @@ static struct file_system_type nfs4_fs_type = {
extern int nfs_init_nfspagecache(void);
extern void nfs_destroy_nfspagecache(void);
extern int nfs_init_readpagecache(void);
extern int nfs_destroy_readpagecache(void);
extern void nfs_destroy_readpagecache(void);
extern int nfs_init_writepagecache(void);
extern int nfs_destroy_writepagecache(void);
extern void nfs_destroy_writepagecache(void);
#ifdef CONFIG_NFS_DIRECTIO
extern int nfs_init_directcache(void);
extern void nfs_destroy_directcache(void);
#endif
static kmem_cache_t * nfs_inode_cachep;
......@@ -1904,6 +1918,12 @@ static int __init init_nfs_fs(void)
if (err)
goto out1;
#ifdef CONFIG_NFS_DIRECTIO
err = nfs_init_directcache();
if (err)
goto out0;
#endif
#ifdef CONFIG_PROC_FS
rpc_proc_register(&nfs_rpcstat);
#endif
......@@ -1914,8 +1934,14 @@ static int __init init_nfs_fs(void)
goto out;
return 0;
out:
#ifdef CONFIG_PROC_FS
rpc_proc_unregister("nfs");
#endif
nfs_destroy_writepagecache();
#ifdef CONFIG_NFS_DIRECTIO
out0:
nfs_destroy_directcache();
#endif
out1:
nfs_destroy_readpagecache();
out2:
......@@ -1928,6 +1954,9 @@ static int __init init_nfs_fs(void)
static void __exit exit_nfs_fs(void)
{
#ifdef CONFIG_NFS_DIRECTIO
nfs_destroy_directcache();
#endif
nfs_destroy_writepagecache();
nfs_destroy_readpagecache();
nfs_destroy_inodecache();
......
......@@ -80,10 +80,10 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
dprintk("%s: call fsinfo\n", __FUNCTION__);
info->fattr->valid = 0;
status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
dprintk("%s: reply fsinfo %d\n", __FUNCTION__, status);
dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
dprintk("%s: reply getattr %d\n", __FUNCTION__, status);
dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
}
return status;
}
......@@ -101,7 +101,7 @@ nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
fattr->valid = 0;
status = rpc_call(server->client, NFS3PROC_GETATTR,
fhandle, fattr, 0);
dprintk("NFS reply getattr\n");
dprintk("NFS reply getattr: %d\n", status);
return status;
}
......@@ -119,7 +119,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
dprintk("NFS call setattr\n");
fattr->valid = 0;
status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
dprintk("NFS reply setattr\n");
dprintk("NFS reply setattr: %d\n", status);
return status;
}
......@@ -198,7 +198,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE))
entry->mask |= MAY_EXEC;
}
dprintk("NFS reply access, status = %d\n", status);
dprintk("NFS reply access: %d\n", status);
return status;
}
......@@ -296,7 +296,7 @@ static int nfs3_proc_commit(struct nfs_write_data *cdata)
* For now, we don't implement O_EXCL.
*/
static struct inode *
nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
int flags)
{
struct nfs_fh fhandle;
......@@ -304,8 +304,8 @@ nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
struct nfs_fattr dir_attr;
struct nfs3_createargs arg = {
.fh = NFS_FH(dir),
.name = name->name,
.len = name->len,
.name = dentry->d_name.name,
.len = dentry->d_name.len,
.sattr = sattr,
};
struct nfs3_diropres res = {
......@@ -315,7 +315,7 @@ nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
};
int status;
dprintk("NFS call create %s\n", name->name);
dprintk("NFS call create %s\n", dentry->d_name.name);
arg.createmode = NFS3_CREATE_UNCHECKED;
if (flags & O_EXCL) {
arg.createmode = NFS3_CREATE_EXCLUSIVE;
......@@ -353,7 +353,7 @@ nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
if (status != 0)
goto out;
if (fhandle.size == 0 || !(fattr.valid & NFS_ATTR_FATTR)) {
status = nfs3_proc_lookup(dir, name, &fhandle, &fattr);
status = nfs3_proc_lookup(dir, &dentry->d_name, &fhandle, &fattr);
if (status != 0)
goto out;
}
......
......@@ -477,7 +477,7 @@ static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, st
/*
* Returns an nfs4_state + an referenced inode
*/
static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
{
struct nfs4_state_owner *sp;
struct nfs4_state *state = NULL;
......@@ -491,7 +491,7 @@ static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct
struct nfs_openargs o_arg = {
.fh = NFS_FH(dir),
.open_flags = flags,
.name = name,
.name = &dentry->d_name,
.server = server,
.bitmask = server->attr_bitmask,
.claim = NFS4_OPEN_CLAIM_NULL,
......@@ -581,14 +581,14 @@ static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct
}
struct nfs4_state *nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred)
struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred)
{
struct nfs4_exception exception = { };
struct nfs4_state *res;
int status;
do {
status = _nfs4_do_open(dir, name, flags, sattr, cred, &res);
status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res);
if (status == 0)
break;
/* NOTE: BAD_SEQID means the server and client disagree about the
......@@ -635,6 +635,8 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
fattr->valid = 0;
if (state != NULL)
msg.rpc_cred = state->owner->so_cred;
if (sattr->ia_valid & ATTR_SIZE)
nfs4_copy_stateid(&arg.stateid, state, NULL);
else
......@@ -658,6 +660,61 @@ int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
return err;
}
struct nfs4_closedata {
struct inode *inode;
struct nfs4_state *state;
struct nfs_closeargs arg;
struct nfs_closeres res;
};
static void nfs4_close_done(struct rpc_task *task)
{
struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
struct nfs4_state *state = calldata->state;
struct nfs4_state_owner *sp = state->owner;
struct nfs_server *server = NFS_SERVER(calldata->inode);
/* hmm. we are done with the inode, and in the process of freeing
* the state_owner. we keep this around to process errors
*/
nfs4_increment_seqid(task->tk_status, sp);
switch (task->tk_status) {
case 0:
state->state = calldata->arg.open_flags;
memcpy(&state->stateid, &calldata->res.stateid,
sizeof(state->stateid));
break;
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
state->state = calldata->arg.open_flags;
nfs4_schedule_state_recovery(server->nfs4_state);
break;
default:
if (nfs4_async_handle_error(task, server) == -EAGAIN) {
rpc_restart_call(task);
return;
}
}
nfs4_put_open_state(state);
up(&sp->so_sema);
nfs4_put_state_owner(sp);
up_read(&server->nfs4_state->cl_sem);
kfree(calldata);
}
static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata)
{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
.rpc_argp = &calldata->arg,
.rpc_resp = &calldata->res,
.rpc_cred = calldata->state->owner->so_cred,
};
if (calldata->arg.open_flags != 0)
msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata);
}
/*
* It is possible for data to be read/written from a mem-mapped file
* after the sys_close call (which hits the vfs layer as a flush).
......@@ -669,102 +726,34 @@ int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
*
* NOTE: Caller must be holding the sp->so_owner semaphore!
*/
static int _nfs4_do_close(struct inode *inode, struct nfs4_state *state)
int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode)
{
struct nfs4_state_owner *sp = state->owner;
int status = 0;
struct nfs_closeargs arg = {
.fh = NFS_FH(inode),
};
struct nfs_closeres res;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
.rpc_argp = &arg,
.rpc_resp = &res,
};
struct nfs4_closedata *calldata;
int status;
if (test_bit(NFS_DELEGATED_STATE, &state->flags))
/* Tell caller we're done */
if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
state->state = mode;
return 0;
memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid));
}
calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL);
if (calldata == NULL)
return -ENOMEM;
calldata->inode = inode;
calldata->state = state;
calldata->arg.fh = NFS_FH(inode);
/* Serialization for the sequence id */
arg.seqid = sp->so_seqid,
status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, RPC_TASK_NOINTR);
/* hmm. we are done with the inode, and in the process of freeing
* the state_owner. we keep this around to process errors
calldata->arg.seqid = state->owner->so_seqid;
calldata->arg.open_flags = mode;
memcpy(&calldata->arg.stateid, &state->stateid,
sizeof(calldata->arg.stateid));
status = nfs4_close_call(NFS_SERVER(inode)->client, calldata);
/*
* Return -EINPROGRESS on success in order to indicate to the
* caller that an asynchronous RPC call has been launched, and
* that it will release the semaphores on completion.
*/
nfs4_increment_seqid(status, sp);
if (!status)
memcpy(&state->stateid, &res.stateid, sizeof(state->stateid));
return status;
}
int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
{
struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs4_exception exception = { };
int err;
do {
err = _nfs4_do_close(inode, state);
switch (err) {
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
nfs4_schedule_state_recovery(server->nfs4_state);
err = 0;
default:
state->state = 0;
}
err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
return err;
}
static int _nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode)
{
struct nfs4_state_owner *sp = state->owner;
int status = 0;
struct nfs_closeargs arg = {
.fh = NFS_FH(inode),
.seqid = sp->so_seqid,
.open_flags = mode,
};
struct nfs_closeres res;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE],
.rpc_argp = &arg,
.rpc_resp = &res,
};
if (test_bit(NFS_DELEGATED_STATE, &state->flags))
return 0;
memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid));
status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, RPC_TASK_NOINTR);
nfs4_increment_seqid(status, sp);
if (!status)
memcpy(&state->stateid, &res.stateid, sizeof(state->stateid));
return status;
}
int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode)
{
struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs4_exception exception = { };
int err;
do {
err = _nfs4_do_downgrade(inode, state, mode);
switch (err) {
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
nfs4_schedule_state_recovery(server->nfs4_state);
err = 0;
default:
state->state = mode;
}
err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
return err;
return (status == 0) ? -EINPROGRESS : status;
}
struct inode *
......@@ -785,7 +774,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
}
cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred);
state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
put_rpccred(cred);
if (IS_ERR(state))
return (struct inode *)state;
......@@ -802,7 +791,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags)
cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
if (IS_ERR(state))
state = nfs4_do_open(dir, &dentry->d_name, openflags, NULL, cred);
state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
put_rpccred(cred);
if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0)
return 1;
......@@ -1026,7 +1015,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
FMODE_WRITE, cred);
if (IS_ERR(state))
state = nfs4_do_open(dentry->d_parent->d_inode,
&dentry->d_name, FMODE_WRITE,
dentry, FMODE_WRITE,
NULL, cred);
need_iput = 1;
}
......@@ -1327,7 +1316,7 @@ static int nfs4_proc_commit(struct nfs_write_data *cdata)
*/
static struct inode *
nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
int flags)
{
struct inode *inode;
......@@ -1335,7 +1324,7 @@ nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
struct rpc_cred *cred;
cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
state = nfs4_do_open(dir, name, flags, sattr, cred);
state = nfs4_do_open(dir, dentry, flags, sattr, cred);
put_rpccred(cred);
if (!IS_ERR(state)) {
inode = state->inode;
......
......@@ -445,7 +445,7 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
state->owner = owner;
atomic_inc(&owner->so_count);
list_add(&state->inode_states, &nfsi->open_states);
state->inode = inode;
state->inode = igrab(inode);
spin_unlock(&inode->i_lock);
} else {
spin_unlock(&inode->i_lock);
......@@ -471,6 +471,7 @@ void nfs4_put_open_state(struct nfs4_state *state)
list_del(&state->inode_states);
spin_unlock(&inode->i_lock);
list_del(&state->open_states);
iput(inode);
BUG_ON (state->state != 0);
nfs4_free_open_state(state);
nfs4_put_state_owner(owner);
......@@ -486,7 +487,6 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
struct nfs4_state_owner *owner = state->owner;
struct nfs4_client *clp = owner->so_client;
int newstate;
int status = 0;
atomic_inc(&owner->so_count);
down_read(&clp->cl_sem);
......@@ -508,10 +508,8 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
newstate |= FMODE_WRITE;
if (state->state == newstate)
goto out;
if (newstate != 0)
status = nfs4_do_downgrade(inode, state, newstate);
else
status = nfs4_do_close(inode, state);
if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS)
return;
}
out:
nfs4_put_open_state(state);
......
......@@ -63,12 +63,12 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
dprintk("%s: call getattr\n", __FUNCTION__);
fattr->valid = 0;
status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
dprintk("%s: reply getattr %d\n", __FUNCTION__, status);
dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
if (status)
return status;
dprintk("%s: call statfs\n", __FUNCTION__);
status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
dprintk("%s: reply statfs %d\n", __FUNCTION__, status);
dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
if (status)
return status;
info->rtmax = NFS_MAXDATA;
......@@ -96,7 +96,7 @@ nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
fattr->valid = 0;
status = rpc_call(server->client, NFSPROC_GETATTR,
fhandle, fattr, 0);
dprintk("NFS reply getattr\n");
dprintk("NFS reply getattr: %d\n", status);
return status;
}
......@@ -114,7 +114,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
dprintk("NFS call setattr\n");
fattr->valid = 0;
status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
dprintk("NFS reply setattr\n");
dprintk("NFS reply setattr: %d\n", status);
return status;
}
......@@ -213,15 +213,15 @@ static int nfs_proc_write(struct nfs_write_data *wdata)
}
static struct inode *
nfs_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
int flags)
{
struct nfs_fh fhandle;
struct nfs_fattr fattr;
struct nfs_createargs arg = {
.fh = NFS_FH(dir),
.name = name->name,
.len = name->len,
.name = dentry->d_name.name,
.len = dentry->d_name.len,
.sattr = sattr
};
struct nfs_diropok res = {
......@@ -231,7 +231,7 @@ nfs_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
int status;
fattr.valid = 0;
dprintk("NFS call create %s\n", name->name);
dprintk("NFS call create %s\n", dentry->d_name.name);
status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
dprintk("NFS reply create: %d\n", status);
if (status == 0) {
......
......@@ -24,7 +24,6 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/mempool.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
......@@ -39,25 +38,11 @@ static void nfs_readpage_result_partial(struct nfs_read_data *, int);
static void nfs_readpage_result_full(struct nfs_read_data *, int);
static kmem_cache_t *nfs_rdata_cachep;
static mempool_t *nfs_rdata_mempool;
mempool_t *nfs_rdata_mempool;
#define MIN_POOL_READ (32)
static struct nfs_read_data *nfs_readdata_alloc(void)
{
struct nfs_read_data *p;
p = (struct nfs_read_data *)mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
if (p)
memset(p, 0, sizeof(*p));
return p;
}
static __inline__ void nfs_readdata_free(struct nfs_read_data *p)
{
mempool_free(p, nfs_rdata_mempool);
}
static void nfs_readdata_release(struct rpc_task *task)
void nfs_readdata_release(struct rpc_task *task)
{
struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
nfs_readdata_free(data);
......
......@@ -215,7 +215,6 @@ nfs_complete_unlink(struct dentry *dentry)
spin_lock(&dentry->d_lock);
dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
spin_unlock(&dentry->d_lock);
if (data->task.tk_rpcwait == &nfs_delete_queue)
rpc_wake_up_task(&data->task);
rpc_wake_up_task(&data->task);
nfs_put_unlinkdata(data);
}
......@@ -61,7 +61,6 @@
#include <linux/nfs_page.h>
#include <asm/uaccess.h>
#include <linux/smp_lock.h>
#include <linux/mempool.h>
#include "delegation.h"
......@@ -83,49 +82,17 @@ static int nfs_wait_on_write_congestion(struct address_space *, int);
static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
static kmem_cache_t *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
static mempool_t *nfs_commit_mempool;
mempool_t *nfs_wdata_mempool;
mempool_t *nfs_commit_mempool;
static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
static __inline__ struct nfs_write_data *nfs_writedata_alloc(void)
{
struct nfs_write_data *p;
p = (struct nfs_write_data *)mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
}
return p;
}
static __inline__ void nfs_writedata_free(struct nfs_write_data *p)
{
mempool_free(p, nfs_wdata_mempool);
}
static void nfs_writedata_release(struct rpc_task *task)
void nfs_writedata_release(struct rpc_task *task)
{
struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata;
nfs_writedata_free(wdata);
}
static __inline__ struct nfs_write_data *nfs_commit_alloc(void)
{
struct nfs_write_data *p;
p = (struct nfs_write_data *)mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
}
return p;
}
static __inline__ void nfs_commit_free(struct nfs_write_data *p)
{
mempool_free(p, nfs_commit_mempool);
}
/* Adjust the file length if we're writing beyond the end */
static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
{
......@@ -184,11 +151,10 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
int result, written = 0;
struct nfs_write_data *wdata;
wdata = kmalloc(sizeof(*wdata), GFP_NOFS);
wdata = nfs_writedata_alloc();
if (!wdata)
return -ENOMEM;
memset(wdata, 0, sizeof(*wdata));
wdata->flags = how;
wdata->cred = ctx->cred;
wdata->inode = inode;
......@@ -238,8 +204,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
io_error:
nfs_end_data_update_defer(inode);
kfree(wdata);
nfs_writedata_free(wdata);
return written ? written : result;
}
......@@ -1199,7 +1164,8 @@ void nfs_writeback_done(struct rpc_task *task)
}
if (time_before(complain, jiffies)) {
printk(KERN_WARNING
"NFS: Server wrote less than requested.\n");
"NFS: Server wrote zero bytes, expected %u.\n",
argp->count);
complain = jiffies + 300 * HZ;
}
/* Can't do anything about it except throw an error. */
......
......@@ -199,6 +199,7 @@ static inline int dname_external(struct dentry *dentry)
* These are the low-level FS interfaces to the dcache..
*/
extern void d_instantiate(struct dentry *, struct inode *);
extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
extern void d_delete(struct dentry *);
/* allocate/de-allocate */
......@@ -242,6 +243,23 @@ static inline void d_add(struct dentry *entry, struct inode *inode)
d_rehash(entry);
}
/**
* d_add_unique - add dentry to hash queues without aliasing
* @entry: dentry to add
* @inode: The inode to attach to this dentry
*
* This adds the entry to the hash queues and initializes @inode.
* The entry was actually filled in earlier during d_alloc().
*/
static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *inode)
{
struct dentry *res;
res = d_instantiate_unique(entry, inode);
d_rehash(res != NULL ? res : entry);
return res;
}
/* used for rename() and baskets */
extern void d_move(struct dentry *, struct dentry *);
......
......@@ -1189,11 +1189,6 @@ extern long do_mount(char *, char *, char *, unsigned long, void *);
extern int vfs_statfs(struct super_block *, struct kstatfs *);
/* Return value for VFS lock functions - tells locks.c to lock conventionally
* REALLY kosha for root NFS and nfs_lock
*/
#define LOCK_USE_CLNT 1
#define FLOCK_VERIFY_READ 1
#define FLOCK_VERIFY_WRITE 2
......
......@@ -30,6 +30,7 @@
#include <linux/nfs_xdr.h>
#include <linux/rwsem.h>
#include <linux/workqueue.h>
#include <linux/mempool.h>
/*
* Enable debugging support for nfs client.
......@@ -201,6 +202,7 @@ struct nfs_inode {
#define NFS_INO_INVALID_ATTR 0x0008 /* cached attrs are invalid */
#define NFS_INO_INVALID_DATA 0x0010 /* cached data is invalid */
#define NFS_INO_INVALID_ATIME 0x0020 /* cached atime is invalid */
#define NFS_INO_INVALID_ACCESS 0x0040 /* cached access cred invalid */
static inline struct nfs_inode *NFS_I(struct inode *inode)
{
......@@ -239,7 +241,7 @@ static inline int nfs_caches_unstable(struct inode *inode)
static inline void NFS_CACHEINV(struct inode *inode)
{
if (!nfs_caches_unstable(inode))
NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR;
NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
}
static inline int nfs_server_capable(struct inode *inode, int cap)
......@@ -424,6 +426,44 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page)
return nfs_wb_page_priority(inode, page, 0);
}
/*
* Allocate and free nfs_write_data structures
*/
extern mempool_t *nfs_wdata_mempool;
extern mempool_t *nfs_commit_mempool;
static inline struct nfs_write_data *nfs_writedata_alloc(void)
{
struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
}
return p;
}
static inline void nfs_writedata_free(struct nfs_write_data *p)
{
mempool_free(p, nfs_wdata_mempool);
}
extern void nfs_writedata_release(struct rpc_task *task);
static inline struct nfs_write_data *nfs_commit_alloc(void)
{
struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
}
return p;
}
static inline void nfs_commit_free(struct nfs_write_data *p)
{
mempool_free(p, nfs_commit_mempool);
}
/* Hack for future NFS swap support */
#ifndef IS_SWAPFILE
# define IS_SWAPFILE(inode) (0)
......@@ -438,6 +478,26 @@ extern int nfs_readpages(struct file *, struct address_space *,
extern int nfs_pagein_list(struct list_head *, int);
extern void nfs_readpage_result(struct rpc_task *);
/*
* Allocate and free nfs_read_data structures
*/
extern mempool_t *nfs_rdata_mempool;
static inline struct nfs_read_data *nfs_readdata_alloc(void)
{
struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
if (p)
memset(p, 0, sizeof(*p));
return p;
}
static inline void nfs_readdata_free(struct nfs_read_data *p)
{
mempool_free(p, nfs_rdata_mempool);
}
extern void nfs_readdata_release(struct rpc_task *task);
/*
* linux/fs/mount_clnt.c
* (Used only by nfsroot module)
......@@ -651,8 +711,7 @@ extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *);
extern int nfs4_proc_async_renew(struct nfs4_client *);
extern int nfs4_proc_renew(struct nfs4_client *);
extern int nfs4_do_close(struct inode *, struct nfs4_state *);
extern int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode);
extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode);
extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *);
extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
......
......@@ -681,7 +681,7 @@ struct nfs_rpc_ops {
int (*read) (struct nfs_read_data *);
int (*write) (struct nfs_write_data *);
int (*commit) (struct nfs_write_data *);
struct inode * (*create) (struct inode *, struct qstr *,
struct inode * (*create) (struct inode *, struct dentry *,
struct iattr *, int);
int (*remove) (struct inode *, struct qstr *);
int (*unlink_setup) (struct rpc_message *,
......
......@@ -51,7 +51,6 @@ struct rpc_cred {
};
#define RPCAUTH_CRED_LOCKED 0x0001
#define RPCAUTH_CRED_UPTODATE 0x0002
#define RPCAUTH_CRED_DEAD 0x0004
#define RPCAUTH_CRED_MAGIC 0x0f4aa4f0
......@@ -131,7 +130,6 @@ int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
int rpcauth_refreshcred(struct rpc_task *);
void rpcauth_invalcred(struct rpc_task *);
int rpcauth_uptodatecred(struct rpc_task *);
int rpcauth_deadcred(struct rpc_task *);
void rpcauth_init_credcache(struct rpc_auth *);
void rpcauth_free_credcache(struct rpc_auth *);
......
......@@ -11,7 +11,9 @@
#include <linux/timer.h>
#include <linux/sunrpc/types.h>
#include <linux/spinlock.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
#include <linux/sunrpc/xdr.h>
/*
......@@ -25,11 +27,18 @@ struct rpc_message {
struct rpc_cred * rpc_cred; /* Credentials */
};
struct rpc_wait_queue;
struct rpc_wait {
struct list_head list; /* wait queue links */
struct list_head links; /* Links to related tasks */
wait_queue_head_t waitq; /* sync: sleep on this q */
struct rpc_wait_queue * rpc_waitq; /* RPC wait queue we're on */
};
/*
* This is the RPC task struct
*/
struct rpc_task {
struct list_head tk_list; /* wait queue links */
#ifdef RPC_DEBUG
unsigned long tk_magic; /* 0xf00baa */
#endif
......@@ -37,7 +46,6 @@ struct rpc_task {
struct rpc_clnt * tk_client; /* RPC client */
struct rpc_rqst * tk_rqstp; /* RPC request */
int tk_status; /* result of last operation */
struct rpc_wait_queue * tk_rpcwait; /* RPC wait queue we're on */
/*
* RPC call state
......@@ -70,13 +78,18 @@ struct rpc_task {
* you have a pathological interest in kernel oopses.
*/
struct timer_list tk_timer; /* kernel timer */
wait_queue_head_t tk_wait; /* sync: sleep on this q */
unsigned long tk_timeout; /* timeout for rpc_sleep() */
unsigned short tk_flags; /* misc flags */
unsigned char tk_active : 1;/* Task has been activated */
unsigned char tk_priority : 2;/* Task priority */
unsigned long tk_runstate; /* Task run status */
struct list_head tk_links; /* links to related tasks */
struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could
* be any workqueue
*/
union {
struct work_struct tk_work; /* Async task work queue */
struct rpc_wait tk_wait; /* RPC wait */
} u;
#ifdef RPC_DEBUG
unsigned short tk_pid; /* debugging aid */
#endif
......@@ -87,11 +100,11 @@ struct rpc_task {
/* support walking a list of tasks on a wait queue */
#define task_for_each(task, pos, head) \
list_for_each(pos, head) \
if ((task=list_entry(pos, struct rpc_task, tk_list)),1)
if ((task=list_entry(pos, struct rpc_task, u.tk_wait.list)),1)
#define task_for_first(task, head) \
if (!list_empty(head) && \
((task=list_entry((head)->next, struct rpc_task, tk_list)),1))
((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1))
/* .. and walking list of all tasks */
#define alltask_for_each(task, pos, head) \
......@@ -126,22 +139,39 @@ typedef void (*rpc_action)(struct rpc_task *);
#define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT)
#define RPC_TASK_UNINTERRUPTIBLE(t) ((t)->tk_flags & RPC_TASK_NOINTR)
#define RPC_TASK_SLEEPING 0
#define RPC_TASK_RUNNING 1
#define RPC_IS_SLEEPING(t) (test_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate))
#define RPC_IS_RUNNING(t) (test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
#define RPC_TASK_RUNNING 0
#define RPC_TASK_QUEUED 1
#define RPC_TASK_WAKEUP 2
#define RPC_TASK_HAS_TIMER 3
#define RPC_IS_RUNNING(t) (test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
#define rpc_set_running(t) (set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
#define rpc_clear_running(t) (clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
#define rpc_test_and_set_running(t) \
(test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
#define rpc_clear_running(t) \
do { \
smp_mb__before_clear_bit(); \
clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate); \
smp_mb__after_clear_bit(); \
} while (0)
#define rpc_set_sleeping(t) (set_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate))
#define RPC_IS_QUEUED(t) (test_bit(RPC_TASK_QUEUED, &(t)->tk_runstate))
#define rpc_set_queued(t) (set_bit(RPC_TASK_QUEUED, &(t)->tk_runstate))
#define rpc_clear_queued(t) \
do { \
smp_mb__before_clear_bit(); \
clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate); \
smp_mb__after_clear_bit(); \
} while (0)
#define rpc_clear_sleeping(t) \
#define rpc_start_wakeup(t) \
(test_and_set_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate) == 0)
#define rpc_finish_wakeup(t) \
do { \
smp_mb__before_clear_bit(); \
clear_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate); \
clear_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate); \
smp_mb__after_clear_bit(); \
} while(0)
} while (0)
/*
* Task priorities.
......@@ -157,6 +187,7 @@ typedef void (*rpc_action)(struct rpc_task *);
* RPC synchronization objects
*/
struct rpc_wait_queue {
spinlock_t lock;
struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */
unsigned long cookie; /* cookie of last task serviced */
unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */
......@@ -177,6 +208,7 @@ struct rpc_wait_queue {
#ifndef RPC_DEBUG
# define RPC_WAITQ_INIT(var,qname) { \
.lock = SPIN_LOCK_UNLOCKED, \
.tasks = { \
[0] = LIST_HEAD_INIT(var.tasks[0]), \
[1] = LIST_HEAD_INIT(var.tasks[1]), \
......@@ -185,6 +217,7 @@ struct rpc_wait_queue {
}
#else
# define RPC_WAITQ_INIT(var,qname) { \
.lock = SPIN_LOCK_UNLOCKED, \
.tasks = { \
[0] = LIST_HEAD_INIT(var.tasks[0]), \
[1] = LIST_HEAD_INIT(var.tasks[1]), \
......@@ -209,13 +242,10 @@ void rpc_killall_tasks(struct rpc_clnt *);
int rpc_execute(struct rpc_task *);
void rpc_run_child(struct rpc_task *parent, struct rpc_task *child,
rpc_action action);
int rpc_add_wait_queue(struct rpc_wait_queue *, struct rpc_task *);
void rpc_remove_wait_queue(struct rpc_task *);
void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *);
void rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
rpc_action action, rpc_action timer);
void rpc_add_timer(struct rpc_task *, rpc_action);
void rpc_wake_up_task(struct rpc_task *);
void rpc_wake_up(struct rpc_wait_queue *);
struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);
......
......@@ -214,8 +214,6 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
list_for_each_safe(pos, next, &auth->au_credcache[nr]) {
struct rpc_cred *entry;
entry = list_entry(pos, struct rpc_cred, cr_hash);
if (entry->cr_flags & RPCAUTH_CRED_DEAD)
continue;
if (rpcauth_prune_expired(entry, &free))
continue;
if (entry->cr_ops->crmatch(acred, entry, taskflags)) {
......@@ -307,9 +305,6 @@ put_rpccred(struct rpc_cred *cred)
if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock))
return;
if ((cred->cr_flags & RPCAUTH_CRED_DEAD) && !list_empty(&cred->cr_hash))
list_del_init(&cred->cr_hash);
if (list_empty(&cred->cr_hash)) {
spin_unlock(&rpc_credcache_lock);
rpcauth_crdestroy(cred);
......@@ -413,10 +408,3 @@ rpcauth_uptodatecred(struct rpc_task *task)
return !(task->tk_msg.rpc_cred) ||
(task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE);
}
int
rpcauth_deadcred(struct rpc_task *task)
{
return !(task->tk_msg.rpc_cred) ||
(task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_DEAD);
}
......@@ -480,12 +480,14 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (!cred)
goto err;
if (gss_err)
cred->cr_flags |= RPCAUTH_CRED_DEAD;
cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
else
gss_cred_set_ctx(cred, ctx);
spin_lock(&gss_auth->lock);
gss_msg = __gss_find_upcall(gss_auth, acred.uid);
if (gss_msg) {
if (gss_err)
gss_msg->msg.errno = -EACCES;
__gss_unhash_msg(gss_msg);
spin_unlock(&gss_auth->lock);
gss_release_msg(gss_msg);
......@@ -740,7 +742,9 @@ gss_marshal(struct rpc_task *task, u32 *p, int ruid)
maj_stat = gss_get_mic(ctx->gc_gss_ctx,
GSS_C_QOP_DEFAULT,
&verf_buf, &mic);
if(maj_stat != 0){
if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
} else if (maj_stat != 0) {
printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
goto out_put_ctx;
}
......@@ -779,6 +783,7 @@ gss_validate(struct rpc_task *task, u32 *p)
struct xdr_netobj mic;
u32 flav,len;
u32 service;
u32 maj_stat;
dprintk("RPC: %4u gss_validate\n", task->tk_pid);
......@@ -794,8 +799,11 @@ gss_validate(struct rpc_task *task, u32 *p)
mic.data = (u8 *)p;
mic.len = len;
if (gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state))
goto out_bad;
maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
if (maj_stat)
goto out_bad;
service = gss_pseudoflavor_to_service(ctx->gc_gss_ctx->mech_type,
gss_cred->gc_flavor);
switch (service) {
......@@ -821,11 +829,10 @@ gss_validate(struct rpc_task *task, u32 *p)
}
static inline int
gss_wrap_req_integ(struct gss_cl_ctx *ctx,
kxdrproc_t encode, void *rqstp, u32 *p, void *obj)
gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj)
{
struct rpc_rqst *req = (struct rpc_rqst *)rqstp;
struct xdr_buf *snd_buf = &req->rq_snd_buf;
struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
struct xdr_buf integ_buf;
u32 *integ_len = NULL;
struct xdr_netobj mic;
......@@ -836,7 +843,7 @@ gss_wrap_req_integ(struct gss_cl_ctx *ctx,
integ_len = p++;
offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
*p++ = htonl(req->rq_seqno);
*p++ = htonl(rqstp->rq_seqno);
status = encode(rqstp, p, obj);
if (status)
......@@ -858,7 +865,9 @@ gss_wrap_req_integ(struct gss_cl_ctx *ctx,
maj_stat = gss_get_mic(ctx->gc_gss_ctx,
GSS_C_QOP_DEFAULT, &integ_buf, &mic);
status = -EIO; /* XXX? */
if (maj_stat)
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
else if (maj_stat)
return status;
q = xdr_encode_opaque(p, NULL, mic.len);
......@@ -894,7 +903,8 @@ gss_wrap_req(struct rpc_task *task,
status = encode(rqstp, p, obj);
goto out;
case RPC_GSS_SVC_INTEGRITY:
status = gss_wrap_req_integ(ctx, encode, rqstp, p, obj);
status = gss_wrap_req_integ(cred, ctx, encode,
rqstp, p, obj);
goto out;
case RPC_GSS_SVC_PRIVACY:
default:
......@@ -907,11 +917,10 @@ gss_wrap_req(struct rpc_task *task,
}
static inline int
gss_unwrap_resp_integ(struct gss_cl_ctx *ctx,
kxdrproc_t decode, void *rqstp, u32 **p, void *obj)
gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
struct rpc_rqst *rqstp, u32 **p)
{
struct rpc_rqst *req = (struct rpc_rqst *)rqstp;
struct xdr_buf *rcv_buf = &req->rq_rcv_buf;
struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
struct xdr_buf integ_buf;
struct xdr_netobj mic;
u32 data_offset, mic_offset;
......@@ -926,7 +935,7 @@ gss_unwrap_resp_integ(struct gss_cl_ctx *ctx,
mic_offset = integ_len + data_offset;
if (mic_offset > rcv_buf->len)
return status;
if (ntohl(*(*p)++) != req->rq_seqno)
if (ntohl(*(*p)++) != rqstp->rq_seqno)
return status;
if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset,
......@@ -938,6 +947,8 @@ gss_unwrap_resp_integ(struct gss_cl_ctx *ctx,
maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf,
&mic, NULL);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
if (maj_stat != GSS_S_COMPLETE)
return status;
return 0;
......@@ -962,8 +973,7 @@ gss_unwrap_resp(struct rpc_task *task,
case RPC_GSS_SVC_NONE:
goto out_decode;
case RPC_GSS_SVC_INTEGRITY:
status = gss_unwrap_resp_integ(ctx, decode,
rqstp, &p, obj);
status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p);
if (status)
goto out;
break;
......
......@@ -928,7 +928,7 @@ call_refreshresult(struct rpc_task *task)
task->tk_action = call_reserve;
if (status >= 0 && rpcauth_uptodatecred(task))
return;
if (rpcauth_deadcred(task)) {
if (status == -EACCES) {
rpc_exit(task, -EACCES);
return;
}
......@@ -970,23 +970,31 @@ call_verify(struct rpc_task *task)
struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
u32 *p = iov->iov_base, n;
int error = -EACCES;
if ((len -= 3) < 0)
goto garbage;
goto out_overflow;
p += 1; /* skip XID */
if ((n = ntohl(*p++)) != RPC_REPLY) {
printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n);
goto garbage;
goto out_retry;
}
if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
int error = -EACCES;
if (--len < 0)
goto garbage;
if ((n = ntohl(*p++)) != RPC_AUTH_ERROR) {
printk(KERN_WARNING "call_verify: RPC call rejected: %x\n", n);
} else if (--len < 0)
goto out_overflow;
switch ((n = ntohl(*p++))) {
case RPC_AUTH_ERROR:
break;
case RPC_MISMATCH:
printk(KERN_WARNING "%s: RPC call version mismatch!\n", __FUNCTION__);
goto out_eio;
default:
printk(KERN_WARNING "%s: RPC call rejected, unknown error: %x\n", __FUNCTION__, n);
goto out_eio;
}
if (--len < 0)
goto out_overflow;
switch ((n = ntohl(*p++))) {
case RPC_AUTH_REJECTEDCRED:
case RPC_AUTH_REJECTEDVERF:
......@@ -1017,20 +1025,18 @@ call_verify(struct rpc_task *task)
default:
printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n);
error = -EIO;
} else
goto garbage;
}
dprintk("RPC: %4d call_verify: call rejected %d\n",
task->tk_pid, n);
rpc_exit(task, error);
return NULL;
goto out_err;
}
if (!(p = rpcauth_checkverf(task, p))) {
printk(KERN_WARNING "call_verify: auth check failed\n");
goto garbage; /* bad verifier, retry */
goto out_retry; /* bad verifier, retry */
}
len = p - (u32 *)iov->iov_base - 1;
if (len < 0)
goto garbage;
goto out_overflow;
switch ((n = ntohl(*p++))) {
case RPC_SUCCESS:
return p;
......@@ -1053,23 +1059,28 @@ call_verify(struct rpc_task *task)
task->tk_client->cl_server);
goto out_eio;
case RPC_GARBAGE_ARGS:
dprintk("RPC: %4d %s: server saw garbage\n", task->tk_pid, __FUNCTION__);
break; /* retry */
default:
printk(KERN_WARNING "call_verify: server accept status: %x\n", n);
/* Also retry */
}
garbage:
dprintk("RPC: %4d call_verify: server saw garbage\n", task->tk_pid);
out_retry:
task->tk_client->cl_stats->rpcgarbage++;
if (task->tk_garb_retry) {
task->tk_garb_retry--;
dprintk(KERN_WARNING "RPC: garbage, retrying %4d\n", task->tk_pid);
dprintk(KERN_WARNING "RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid);
task->tk_action = call_bind;
return NULL;
}
printk(KERN_WARNING "RPC: garbage, exit EIO\n");
printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__);
out_eio:
rpc_exit(task, -EIO);
error = -EIO;
out_err:
rpc_exit(task, error);
return NULL;
out_overflow:
printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__);
goto out_retry;
}
......@@ -25,6 +25,7 @@
#ifdef RPC_DEBUG
#define RPCDBG_FACILITY RPCDBG_SCHED
#define RPC_TASK_MAGIC_ID 0xf00baa
static int rpc_task_id;
#endif
......@@ -41,16 +42,9 @@ static mempool_t *rpc_buffer_mempool;
static void __rpc_default_timer(struct rpc_task *task);
static void rpciod_killall(void);
static void rpc_free(struct rpc_task *task);
/*
* When an asynchronous RPC task is activated within a bottom half
* handler, or while executing another RPC task, it is put on
* schedq, and rpciod is woken up.
*/
static RPC_WAITQ(schedq, "schedq");
static void rpc_async_schedule(void *);
/*
* RPC tasks that create another task (e.g. for contacting the portmapper)
......@@ -71,18 +65,10 @@ static LIST_HEAD(all_tasks);
/*
* rpciod-related stuff
*/
static DECLARE_WAIT_QUEUE_HEAD(rpciod_idle);
static DECLARE_COMPLETION(rpciod_killer);
static DECLARE_MUTEX(rpciod_sema);
static unsigned int rpciod_users;
static pid_t rpciod_pid;
static int rpc_inhibit;
static struct workqueue_struct *rpciod_workqueue;
/*
* Spinlock for wait queues. Access to the latter also has to be
* interrupt-safe in order to allow timers to wake up sleeping tasks.
*/
static spinlock_t rpc_queue_lock = SPIN_LOCK_UNLOCKED;
/*
* Spinlock for other critical sections of code.
*/
......@@ -90,7 +76,7 @@ static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED;
/*
* Disable the timer for a given RPC task. Should be called with
* rpc_queue_lock and bh_disabled in order to avoid races within
* queue->lock and bh_disabled in order to avoid races within
* rpc_run_timer().
*/
static inline void
......@@ -108,19 +94,19 @@ __rpc_disable_timer(struct rpc_task *task)
* without calling del_timer_sync(). The latter could cause a
* deadlock if called while we're holding spinlocks...
*/
static void
rpc_run_timer(struct rpc_task *task)
static void rpc_run_timer(struct rpc_task *task)
{
void (*callback)(struct rpc_task *);
spin_lock_bh(&rpc_queue_lock);
callback = task->tk_timeout_fn;
task->tk_timeout_fn = NULL;
spin_unlock_bh(&rpc_queue_lock);
if (callback) {
if (callback && RPC_IS_QUEUED(task)) {
dprintk("RPC: %4d running timer\n", task->tk_pid);
callback(task);
}
smp_mb__before_clear_bit();
clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate);
smp_mb__after_clear_bit();
}
/*
......@@ -139,29 +125,21 @@ __rpc_add_timer(struct rpc_task *task, rpc_action timer)
task->tk_timeout_fn = timer;
else
task->tk_timeout_fn = __rpc_default_timer;
set_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate);
mod_timer(&task->tk_timer, jiffies + task->tk_timeout);
}
/*
* Set up a timer for an already sleeping task.
*/
void rpc_add_timer(struct rpc_task *task, rpc_action timer)
{
spin_lock_bh(&rpc_queue_lock);
if (!RPC_IS_RUNNING(task))
__rpc_add_timer(task, timer);
spin_unlock_bh(&rpc_queue_lock);
}
/*
* Delete any timer for the current task. Because we use del_timer_sync(),
* this function should never be called while holding rpc_queue_lock.
* this function should never be called while holding queue->lock.
*/
static inline void
rpc_delete_timer(struct rpc_task *task)
{
if (del_timer_sync(&task->tk_timer))
if (test_and_clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate)) {
del_singleshot_timer_sync(&task->tk_timer);
dprintk("RPC: %4d deleting timer\n", task->tk_pid);
}
}
/*
......@@ -172,16 +150,17 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r
struct list_head *q;
struct rpc_task *t;
INIT_LIST_HEAD(&task->u.tk_wait.links);
q = &queue->tasks[task->tk_priority];
if (unlikely(task->tk_priority > queue->maxpriority))
q = &queue->tasks[queue->maxpriority];
list_for_each_entry(t, q, tk_list) {
list_for_each_entry(t, q, u.tk_wait.list) {
if (t->tk_cookie == task->tk_cookie) {
list_add_tail(&task->tk_list, &t->tk_links);
list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
return;
}
}
list_add_tail(&task->tk_list, q);
list_add_tail(&task->u.tk_wait.list, q);
}
/*
......@@ -192,37 +171,21 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r
* improve overall performance.
* Everyone else gets appended to the queue to ensure proper FIFO behavior.
*/
static int __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
{
if (task->tk_rpcwait == queue)
return 0;
BUG_ON (RPC_IS_QUEUED(task));
if (task->tk_rpcwait) {
printk(KERN_WARNING "RPC: doubly enqueued task!\n");
return -EWOULDBLOCK;
}
if (RPC_IS_PRIORITY(queue))
__rpc_add_wait_queue_priority(queue, task);
else if (RPC_IS_SWAPPER(task))
list_add(&task->tk_list, &queue->tasks[0]);
list_add(&task->u.tk_wait.list, &queue->tasks[0]);
else
list_add_tail(&task->tk_list, &queue->tasks[0]);
task->tk_rpcwait = queue;
list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
task->u.tk_wait.rpc_waitq = queue;
rpc_set_queued(task);
dprintk("RPC: %4d added to queue %p \"%s\"\n",
task->tk_pid, queue, rpc_qname(queue));
return 0;
}
int rpc_add_wait_queue(struct rpc_wait_queue *q, struct rpc_task *task)
{
int result;
spin_lock_bh(&rpc_queue_lock);
result = __rpc_add_wait_queue(q, task);
spin_unlock_bh(&rpc_queue_lock);
return result;
}
/*
......@@ -232,12 +195,12 @@ static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
{
struct rpc_task *t;
if (!list_empty(&task->tk_links)) {
t = list_entry(task->tk_links.next, struct rpc_task, tk_list);
list_move(&t->tk_list, &task->tk_list);
list_splice_init(&task->tk_links, &t->tk_links);
if (!list_empty(&task->u.tk_wait.links)) {
t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list);
list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
}
list_del(&task->tk_list);
list_del(&task->u.tk_wait.list);
}
/*
......@@ -246,31 +209,17 @@ static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
*/
static void __rpc_remove_wait_queue(struct rpc_task *task)
{
struct rpc_wait_queue *queue = task->tk_rpcwait;
if (!queue)
return;
struct rpc_wait_queue *queue;
queue = task->u.tk_wait.rpc_waitq;
if (RPC_IS_PRIORITY(queue))
__rpc_remove_wait_queue_priority(task);
else
list_del(&task->tk_list);
task->tk_rpcwait = NULL;
list_del(&task->u.tk_wait.list);
dprintk("RPC: %4d removed from queue %p \"%s\"\n",
task->tk_pid, queue, rpc_qname(queue));
}
void
rpc_remove_wait_queue(struct rpc_task *task)
{
if (!task->tk_rpcwait)
return;
spin_lock_bh(&rpc_queue_lock);
__rpc_remove_wait_queue(task);
spin_unlock_bh(&rpc_queue_lock);
}
static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
{
queue->priority = priority;
......@@ -293,6 +242,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
{
int i;
spin_lock_init(&queue->lock);
for (i = 0; i < ARRAY_SIZE(queue->tasks); i++)
INIT_LIST_HEAD(&queue->tasks[i]);
queue->maxpriority = maxprio;
......@@ -319,34 +269,31 @@ EXPORT_SYMBOL(rpc_init_wait_queue);
* Note: If the task is ASYNC, this must be called with
* the spinlock held to protect the wait queue operation.
*/
static inline void
rpc_make_runnable(struct rpc_task *task)
static void rpc_make_runnable(struct rpc_task *task)
{
if (task->tk_timeout_fn) {
printk(KERN_ERR "RPC: task w/ running timer in rpc_make_runnable!!\n");
int do_ret;
BUG_ON(task->tk_timeout_fn);
do_ret = rpc_test_and_set_running(task);
rpc_clear_queued(task);
if (do_ret)
return;
}
rpc_set_running(task);
if (RPC_IS_ASYNC(task)) {
if (RPC_IS_SLEEPING(task)) {
int status;
status = __rpc_add_wait_queue(&schedq, task);
if (status < 0) {
printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
task->tk_status = status;
return;
}
rpc_clear_sleeping(task);
wake_up(&rpciod_idle);
int status;
INIT_WORK(&task->u.tk_work, rpc_async_schedule, (void *)task);
status = queue_work(task->tk_workqueue, &task->u.tk_work);
if (status < 0) {
printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
task->tk_status = status;
return;
}
} else {
rpc_clear_sleeping(task);
wake_up(&task->tk_wait);
}
} else
wake_up(&task->u.tk_wait.waitq);
}
/*
* Place a newly initialized task on the schedq.
* Place a newly initialized task on the workqueue.
*/
static inline void
rpc_schedule_run(struct rpc_task *task)
......@@ -355,33 +302,18 @@ rpc_schedule_run(struct rpc_task *task)
if (RPC_IS_ACTIVATED(task))
return;
task->tk_active = 1;
rpc_set_sleeping(task);
rpc_make_runnable(task);
}
/*
* For other people who may need to wake the I/O daemon
* but should (for now) know nothing about its innards
*/
void rpciod_wake_up(void)
{
if(rpciod_pid==0)
printk(KERN_ERR "rpciod: wot no daemon?\n");
wake_up(&rpciod_idle);
}
/*
* Prepare for sleeping on a wait queue.
* By always appending tasks to the list we ensure FIFO behavior.
* NB: An RPC task will only receive interrupt-driven events as long
* as it's on a wait queue.
*/
static void
__rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
rpc_action action, rpc_action timer)
{
int status;
dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid,
rpc_qname(q), jiffies);
......@@ -391,75 +323,66 @@ __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
}
/* Mark the task as being activated if so needed */
if (!RPC_IS_ACTIVATED(task)) {
if (!RPC_IS_ACTIVATED(task))
task->tk_active = 1;
rpc_set_sleeping(task);
}
status = __rpc_add_wait_queue(q, task);
if (status) {
printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
task->tk_status = status;
} else {
rpc_clear_running(task);
if (task->tk_callback) {
dprintk(KERN_ERR "RPC: %4d overwrites an active callback\n", task->tk_pid);
BUG();
}
task->tk_callback = action;
__rpc_add_timer(task, timer);
}
__rpc_add_wait_queue(q, task);
BUG_ON(task->tk_callback != NULL);
task->tk_callback = action;
__rpc_add_timer(task, timer);
}
void
rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
rpc_action action, rpc_action timer)
{
/*
* Protect the queue operations.
*/
spin_lock_bh(&rpc_queue_lock);
spin_lock_bh(&q->lock);
__rpc_sleep_on(q, task, action, timer);
spin_unlock_bh(&rpc_queue_lock);
spin_unlock_bh(&q->lock);
}
/**
* __rpc_wake_up_task - wake up a single rpc_task
* __rpc_do_wake_up_task - wake up a single rpc_task
* @task: task to be woken up
*
* Caller must hold rpc_queue_lock
* Caller must hold queue->lock, and have cleared the task queued flag.
*/
static void
__rpc_wake_up_task(struct rpc_task *task)
static void __rpc_do_wake_up_task(struct rpc_task *task)
{
dprintk("RPC: %4d __rpc_wake_up_task (now %ld inh %d)\n",
task->tk_pid, jiffies, rpc_inhibit);
dprintk("RPC: %4d __rpc_wake_up_task (now %ld)\n", task->tk_pid, jiffies);
#ifdef RPC_DEBUG
if (task->tk_magic != 0xf00baa) {
printk(KERN_ERR "RPC: attempt to wake up non-existing task!\n");
rpc_debug = ~0;
rpc_show_tasks();
return;
}
BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
#endif
/* Has the task been executed yet? If not, we cannot wake it up! */
if (!RPC_IS_ACTIVATED(task)) {
printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
return;
}
if (RPC_IS_RUNNING(task))
return;
__rpc_disable_timer(task);
if (task->tk_rpcwait != &schedq)
__rpc_remove_wait_queue(task);
__rpc_remove_wait_queue(task);
rpc_make_runnable(task);
dprintk("RPC: __rpc_wake_up_task done\n");
}
/*
* Wake up the specified task
*/
static void __rpc_wake_up_task(struct rpc_task *task)
{
if (rpc_start_wakeup(task)) {
if (RPC_IS_QUEUED(task))
__rpc_do_wake_up_task(task);
rpc_finish_wakeup(task);
}
}
/*
* Default timeout handler if none specified by user
*/
......@@ -474,14 +397,18 @@ __rpc_default_timer(struct rpc_task *task)
/*
* Wake up the specified task
*/
void
rpc_wake_up_task(struct rpc_task *task)
void rpc_wake_up_task(struct rpc_task *task)
{
if (RPC_IS_RUNNING(task))
return;
spin_lock_bh(&rpc_queue_lock);
__rpc_wake_up_task(task);
spin_unlock_bh(&rpc_queue_lock);
if (rpc_start_wakeup(task)) {
if (RPC_IS_QUEUED(task)) {
struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq;
spin_lock_bh(&queue->lock);
__rpc_do_wake_up_task(task);
spin_unlock_bh(&queue->lock);
}
rpc_finish_wakeup(task);
}
}
/*
......@@ -497,11 +424,11 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu
*/
q = &queue->tasks[queue->priority];
if (!list_empty(q)) {
task = list_entry(q->next, struct rpc_task, tk_list);
task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
if (queue->cookie == task->tk_cookie) {
if (--queue->nr)
goto out;
list_move_tail(&task->tk_list, q);
list_move_tail(&task->u.tk_wait.list, q);
}
/*
* Check if we need to switch queues.
......@@ -519,7 +446,7 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu
else
q = q - 1;
if (!list_empty(q)) {
task = list_entry(q->next, struct rpc_task, tk_list);
task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
goto new_queue;
}
} while (q != &queue->tasks[queue->priority]);
......@@ -544,14 +471,14 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
struct rpc_task *task = NULL;
dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue));
spin_lock_bh(&rpc_queue_lock);
spin_lock_bh(&queue->lock);
if (RPC_IS_PRIORITY(queue))
task = __rpc_wake_up_next_priority(queue);
else {
task_for_first(task, &queue->tasks[0])
__rpc_wake_up_task(task);
}
spin_unlock_bh(&rpc_queue_lock);
spin_unlock_bh(&queue->lock);
return task;
}
......@@ -560,25 +487,25 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
* rpc_wake_up - wake up all rpc_tasks
* @queue: rpc_wait_queue on which the tasks are sleeping
*
* Grabs rpc_queue_lock
* Grabs queue->lock
*/
void rpc_wake_up(struct rpc_wait_queue *queue)
{
struct rpc_task *task;
struct list_head *head;
spin_lock_bh(&rpc_queue_lock);
spin_lock_bh(&queue->lock);
head = &queue->tasks[queue->maxpriority];
for (;;) {
while (!list_empty(head)) {
task = list_entry(head->next, struct rpc_task, tk_list);
task = list_entry(head->next, struct rpc_task, u.tk_wait.list);
__rpc_wake_up_task(task);
}
if (head == &queue->tasks[0])
break;
head--;
}
spin_unlock_bh(&rpc_queue_lock);
spin_unlock_bh(&queue->lock);
}
/**
......@@ -586,18 +513,18 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
* @queue: rpc_wait_queue on which the tasks are sleeping
* @status: status value to set
*
* Grabs rpc_queue_lock
* Grabs queue->lock
*/
void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
{
struct list_head *head;
struct rpc_task *task;
spin_lock_bh(&rpc_queue_lock);
spin_lock_bh(&queue->lock);
head = &queue->tasks[queue->maxpriority];
for (;;) {
while (!list_empty(head)) {
task = list_entry(head->next, struct rpc_task, tk_list);
task = list_entry(head->next, struct rpc_task, u.tk_wait.list);
task->tk_status = status;
__rpc_wake_up_task(task);
}
......@@ -605,7 +532,7 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
break;
head--;
}
spin_unlock_bh(&rpc_queue_lock);
spin_unlock_bh(&queue->lock);
}
/*
......@@ -629,21 +556,22 @@ __rpc_atrun(struct rpc_task *task)
/*
* This is the RPC `scheduler' (or rather, the finite state machine).
*/
static int
__rpc_execute(struct rpc_task *task)
static int __rpc_execute(struct rpc_task *task)
{
int status = 0;
dprintk("RPC: %4d rpc_execute flgs %x\n",
task->tk_pid, task->tk_flags);
if (!RPC_IS_RUNNING(task)) {
printk(KERN_WARNING "RPC: rpc_execute called for sleeping task!!\n");
return 0;
}
BUG_ON(RPC_IS_QUEUED(task));
restarted:
while (1) {
/*
* Garbage collection of pending timers...
*/
rpc_delete_timer(task);
/*
* Execute any pending callback.
*/
......@@ -660,7 +588,9 @@ __rpc_execute(struct rpc_task *task)
*/
save_callback=task->tk_callback;
task->tk_callback=NULL;
lock_kernel();
save_callback(task);
unlock_kernel();
}
/*
......@@ -668,43 +598,35 @@ __rpc_execute(struct rpc_task *task)
* tk_action may be NULL when the task has been killed
* by someone else.
*/
if (RPC_IS_RUNNING(task)) {
/*
* Garbage collection of pending timers...
*/
rpc_delete_timer(task);
if (!RPC_IS_QUEUED(task)) {
if (!task->tk_action)
break;
lock_kernel();
task->tk_action(task);
/* micro-optimization to avoid spinlock */
if (RPC_IS_RUNNING(task))
continue;
unlock_kernel();
}
/*
* Check whether task is sleeping.
* Lockless check for whether task is sleeping or not.
*/
spin_lock_bh(&rpc_queue_lock);
if (!RPC_IS_RUNNING(task)) {
rpc_set_sleeping(task);
if (RPC_IS_ASYNC(task)) {
spin_unlock_bh(&rpc_queue_lock);
if (!RPC_IS_QUEUED(task))
continue;
rpc_clear_running(task);
if (RPC_IS_ASYNC(task)) {
/* Careful! we may have raced... */
if (RPC_IS_QUEUED(task))
return 0;
}
if (rpc_test_and_set_running(task))
return 0;
continue;
}
spin_unlock_bh(&rpc_queue_lock);
if (!RPC_IS_SLEEPING(task))
continue;
/* sync task: sleep here */
dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid);
if (current->pid == rpciod_pid)
printk(KERN_ERR "RPC: rpciod waiting on sync task!\n");
if (RPC_TASK_UNINTERRUPTIBLE(task)) {
__wait_event(task->tk_wait, !RPC_IS_SLEEPING(task));
__wait_event(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task));
} else {
__wait_event_interruptible(task->tk_wait, !RPC_IS_SLEEPING(task), status);
__wait_event_interruptible(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task), status);
/*
* When a sync task receives a signal, it exits with
* -ERESTARTSYS. In order to catch any callbacks that
......@@ -718,11 +640,14 @@ __rpc_execute(struct rpc_task *task)
rpc_wake_up_task(task);
}
}
rpc_set_running(task);
dprintk("RPC: %4d sync task resuming\n", task->tk_pid);
}
if (task->tk_exit) {
lock_kernel();
task->tk_exit(task);
unlock_kernel();
/* If tk_action is non-null, the user wants us to restart */
if (task->tk_action) {
if (!RPC_ASSASSINATED(task)) {
......@@ -741,7 +666,6 @@ __rpc_execute(struct rpc_task *task)
/* Release all resources associated with the task */
rpc_release_task(task);
return status;
}
......@@ -757,57 +681,16 @@ __rpc_execute(struct rpc_task *task)
int
rpc_execute(struct rpc_task *task)
{
int status = -EIO;
if (rpc_inhibit) {
printk(KERN_INFO "RPC: execution inhibited!\n");
goto out_release;
}
status = -EWOULDBLOCK;
if (task->tk_active) {
printk(KERN_ERR "RPC: active task was run twice!\n");
goto out_err;
}
BUG_ON(task->tk_active);
task->tk_active = 1;
rpc_set_running(task);
return __rpc_execute(task);
out_release:
rpc_release_task(task);
out_err:
return status;
}
/*
* This is our own little scheduler for async RPC tasks.
*/
static void
__rpc_schedule(void)
static void rpc_async_schedule(void *arg)
{
struct rpc_task *task;
int count = 0;
dprintk("RPC: rpc_schedule enter\n");
while (1) {
task_for_first(task, &schedq.tasks[0]) {
__rpc_remove_wait_queue(task);
spin_unlock_bh(&rpc_queue_lock);
__rpc_execute(task);
spin_lock_bh(&rpc_queue_lock);
} else {
break;
}
if (++count >= 200 || need_resched()) {
count = 0;
spin_unlock_bh(&rpc_queue_lock);
schedule();
spin_lock_bh(&rpc_queue_lock);
}
}
dprintk("RPC: rpc_schedule leave\n");
__rpc_execute((struct rpc_task *)arg);
}
/*
......@@ -865,7 +748,6 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call
task->tk_client = clnt;
task->tk_flags = flags;
task->tk_exit = callback;
init_waitqueue_head(&task->tk_wait);
if (current->uid != current->fsuid || current->gid != current->fsgid)
task->tk_flags |= RPC_TASK_SETUID;
......@@ -876,12 +758,11 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call
task->tk_priority = RPC_PRIORITY_NORMAL;
task->tk_cookie = (unsigned long)current;
INIT_LIST_HEAD(&task->tk_links);
/* Add to global list of all tasks */
spin_lock(&rpc_sched_lock);
list_add(&task->tk_task, &all_tasks);
spin_unlock(&rpc_sched_lock);
/* Initialize workqueue for async tasks */
task->tk_workqueue = rpciod_workqueue;
if (!RPC_IS_ASYNC(task))
init_waitqueue_head(&task->u.tk_wait.waitq);
if (clnt) {
atomic_inc(&clnt->cl_users);
......@@ -892,9 +773,14 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call
}
#ifdef RPC_DEBUG
task->tk_magic = 0xf00baa;
task->tk_magic = RPC_TASK_MAGIC_ID;
task->tk_pid = rpc_task_id++;
#endif
/* Add to global list of all tasks */
spin_lock(&rpc_sched_lock);
list_add_tail(&task->tk_task, &all_tasks);
spin_unlock(&rpc_sched_lock);
dprintk("RPC: %4d new task procpid %d\n", task->tk_pid,
current->pid);
}
......@@ -947,18 +833,12 @@ rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
goto out;
}
void
rpc_release_task(struct rpc_task *task)
void rpc_release_task(struct rpc_task *task)
{
dprintk("RPC: %4d release task\n", task->tk_pid);
#ifdef RPC_DEBUG
if (task->tk_magic != 0xf00baa) {
printk(KERN_ERR "RPC: attempt to release a non-existing task!\n");
rpc_debug = ~0;
rpc_show_tasks();
return;
}
BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
#endif
/* Remove from global task list */
......@@ -966,19 +846,9 @@ rpc_release_task(struct rpc_task *task)
list_del(&task->tk_task);
spin_unlock(&rpc_sched_lock);
/* Protect the execution below. */
spin_lock_bh(&rpc_queue_lock);
/* Disable timer to prevent zombie wakeup */
__rpc_disable_timer(task);
/* Remove from any wait queue we're still on */
__rpc_remove_wait_queue(task);
BUG_ON (RPC_IS_QUEUED(task));
task->tk_active = 0;
spin_unlock_bh(&rpc_queue_lock);
/* Synchronously delete any running timer */
rpc_delete_timer(task);
......@@ -1008,10 +878,9 @@ rpc_release_task(struct rpc_task *task)
* queue 'childq'. If so returns a pointer to the parent.
* Upon failure returns NULL.
*
* Caller must hold rpc_queue_lock
* Caller must hold childq.lock
*/
static inline struct rpc_task *
rpc_find_parent(struct rpc_task *child)
static inline struct rpc_task *rpc_find_parent(struct rpc_task *child)
{
struct rpc_task *task, *parent;
struct list_head *le;
......@@ -1024,17 +893,16 @@ rpc_find_parent(struct rpc_task *child)
return NULL;
}
static void
rpc_child_exit(struct rpc_task *child)
static void rpc_child_exit(struct rpc_task *child)
{
struct rpc_task *parent;
spin_lock_bh(&rpc_queue_lock);
spin_lock_bh(&childq.lock);
if ((parent = rpc_find_parent(child)) != NULL) {
parent->tk_status = child->tk_status;
__rpc_wake_up_task(parent);
}
spin_unlock_bh(&rpc_queue_lock);
spin_unlock_bh(&childq.lock);
}
/*
......@@ -1057,22 +925,20 @@ rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent)
return NULL;
}
void
rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
{
spin_lock_bh(&rpc_queue_lock);
spin_lock_bh(&childq.lock);
/* N.B. Is it possible for the child to have already finished? */
__rpc_sleep_on(&childq, task, func, NULL);
rpc_schedule_run(child);
spin_unlock_bh(&rpc_queue_lock);
spin_unlock_bh(&childq.lock);
}
/*
* Kill all tasks for the given client.
* XXX: kill their descendants as well?
*/
void
rpc_killall_tasks(struct rpc_clnt *clnt)
void rpc_killall_tasks(struct rpc_clnt *clnt)
{
struct rpc_task *rovr;
struct list_head *le;
......@@ -1083,104 +949,28 @@ rpc_killall_tasks(struct rpc_clnt *clnt)
* Spin lock all_tasks to prevent changes...
*/
spin_lock(&rpc_sched_lock);
alltask_for_each(rovr, le, &all_tasks)
alltask_for_each(rovr, le, &all_tasks) {
if (! RPC_IS_ACTIVATED(rovr))
continue;
if (!clnt || rovr->tk_client == clnt) {
rovr->tk_flags |= RPC_TASK_KILLED;
rpc_exit(rovr, -EIO);
rpc_wake_up_task(rovr);
}
}
spin_unlock(&rpc_sched_lock);
}
static DECLARE_MUTEX_LOCKED(rpciod_running);
static inline int
rpciod_task_pending(void)
{
return !list_empty(&schedq.tasks[0]);
}
/*
* This is the rpciod kernel thread
*/
static int
rpciod(void *ptr)
{
int rounds = 0;
lock_kernel();
/*
* Let our maker know we're running ...
*/
rpciod_pid = current->pid;
up(&rpciod_running);
daemonize("rpciod");
allow_signal(SIGKILL);
dprintk("RPC: rpciod starting (pid %d)\n", rpciod_pid);
spin_lock_bh(&rpc_queue_lock);
while (rpciod_users) {
DEFINE_WAIT(wait);
if (signalled()) {
spin_unlock_bh(&rpc_queue_lock);
rpciod_killall();
flush_signals(current);
spin_lock_bh(&rpc_queue_lock);
}
__rpc_schedule();
if (current->flags & PF_FREEZE) {
spin_unlock_bh(&rpc_queue_lock);
refrigerator(PF_FREEZE);
spin_lock_bh(&rpc_queue_lock);
}
if (++rounds >= 64) { /* safeguard */
spin_unlock_bh(&rpc_queue_lock);
schedule();
rounds = 0;
spin_lock_bh(&rpc_queue_lock);
}
dprintk("RPC: rpciod back to sleep\n");
prepare_to_wait(&rpciod_idle, &wait, TASK_INTERRUPTIBLE);
if (!rpciod_task_pending() && !signalled()) {
spin_unlock_bh(&rpc_queue_lock);
schedule();
rounds = 0;
spin_lock_bh(&rpc_queue_lock);
}
finish_wait(&rpciod_idle, &wait);
dprintk("RPC: switch to rpciod\n");
}
spin_unlock_bh(&rpc_queue_lock);
dprintk("RPC: rpciod shutdown commences\n");
if (!list_empty(&all_tasks)) {
printk(KERN_ERR "rpciod: active tasks at shutdown?!\n");
rpciod_killall();
}
dprintk("RPC: rpciod exiting\n");
unlock_kernel();
rpciod_pid = 0;
complete_and_exit(&rpciod_killer, 0);
return 0;
}
static void
rpciod_killall(void)
static void rpciod_killall(void)
{
unsigned long flags;
while (!list_empty(&all_tasks)) {
clear_thread_flag(TIF_SIGPENDING);
rpc_killall_tasks(NULL);
spin_lock_bh(&rpc_queue_lock);
__rpc_schedule();
spin_unlock_bh(&rpc_queue_lock);
flush_workqueue(rpciod_workqueue);
if (!list_empty(&all_tasks)) {
dprintk("rpciod_killall: waiting for tasks to exit\n");
yield();
......@@ -1198,28 +988,30 @@ rpciod_killall(void)
int
rpciod_up(void)
{
struct workqueue_struct *wq;
int error = 0;
down(&rpciod_sema);
dprintk("rpciod_up: pid %d, users %d\n", rpciod_pid, rpciod_users);
dprintk("rpciod_up: users %d\n", rpciod_users);
rpciod_users++;
if (rpciod_pid)
if (rpciod_workqueue)
goto out;
/*
* If there's no pid, we should be the first user.
*/
if (rpciod_users > 1)
printk(KERN_WARNING "rpciod_up: no pid, %d users??\n", rpciod_users);
printk(KERN_WARNING "rpciod_up: no workqueue, %d users??\n", rpciod_users);
/*
* Create the rpciod thread and wait for it to start.
*/
error = kernel_thread(rpciod, NULL, 0);
if (error < 0) {
printk(KERN_WARNING "rpciod_up: create thread failed, error=%d\n", error);
error = -ENOMEM;
wq = create_workqueue("rpciod");
if (wq == NULL) {
printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error);
rpciod_users--;
goto out;
}
down(&rpciod_running);
rpciod_workqueue = wq;
error = 0;
out:
up(&rpciod_sema);
......@@ -1230,20 +1022,21 @@ void
rpciod_down(void)
{
down(&rpciod_sema);
dprintk("rpciod_down pid %d sema %d\n", rpciod_pid, rpciod_users);
dprintk("rpciod_down sema %d\n", rpciod_users);
if (rpciod_users) {
if (--rpciod_users)
goto out;
} else
printk(KERN_WARNING "rpciod_down: pid=%d, no users??\n", rpciod_pid);
printk(KERN_WARNING "rpciod_down: no users??\n");
if (!rpciod_pid) {
if (!rpciod_workqueue) {
dprintk("rpciod_down: Nothing to do!\n");
goto out;
}
rpciod_killall();
kill_proc(rpciod_pid, SIGKILL, 1);
wait_for_completion(&rpciod_killer);
destroy_workqueue(rpciod_workqueue);
rpciod_workqueue = NULL;
out:
up(&rpciod_sema);
}
......@@ -1261,7 +1054,12 @@ void rpc_show_tasks(void)
}
printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
"-rpcwait -action- --exit--\n");
alltask_for_each(t, le, &all_tasks)
alltask_for_each(t, le, &all_tasks) {
const char *rpc_waitq = "none";
if (RPC_IS_QUEUED(t))
rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n",
t->tk_pid,
(t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
......@@ -1269,8 +1067,9 @@ void rpc_show_tasks(void)
t->tk_client,
(t->tk_client ? t->tk_client->cl_prog : 0),
t->tk_rqstp, t->tk_timeout,
rpc_qname(t->tk_rpcwait),
rpc_waitq,
t->tk_action, t->tk_exit);
}
spin_unlock(&rpc_sched_lock);
}
#endif
......
......@@ -371,6 +371,7 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
do {
/* Are any pointers crossing a page boundary? */
if (pgto_base == 0) {
flush_dcache_page(*pgto);
pgto_base = PAGE_CACHE_SIZE;
pgto--;
}
......@@ -394,6 +395,7 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
kunmap_atomic(vto, KM_USER0);
} while ((len -= copy) != 0);
flush_dcache_page(*pgto);
}
/*
......@@ -427,12 +429,14 @@ _copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len)
pgbase += copy;
if (pgbase == PAGE_CACHE_SIZE) {
flush_dcache_page(*pgto);
pgbase = 0;
pgto++;
}
p += copy;
} while ((len -= copy) != 0);
flush_dcache_page(*pgto);
}
/*
......
......@@ -893,7 +893,8 @@ tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc)
xprt->tcp_flags &= ~XPRT_COPY_XID;
xprt->tcp_flags |= XPRT_COPY_DATA;
xprt->tcp_copied = 4;
dprintk("RPC: reading reply for XID %08x\n", xprt->tcp_xid);
dprintk("RPC: reading reply for XID %08x\n",
ntohl(xprt->tcp_xid));
tcp_check_recm(xprt);
}
......@@ -913,7 +914,7 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc)
if (!req) {
xprt->tcp_flags &= ~XPRT_COPY_DATA;
dprintk("RPC: XID %08x request not found!\n",
xprt->tcp_xid);
ntohl(xprt->tcp_xid));
spin_unlock(&xprt->sock_lock);
return;
}
......@@ -1103,7 +1104,7 @@ xprt_write_space(struct sock *sk)
goto out;
spin_lock_bh(&xprt->sock_lock);
if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending)
if (xprt->snd_task)
rpc_wake_up_task(xprt->snd_task);
spin_unlock_bh(&xprt->sock_lock);
out:
......@@ -1362,7 +1363,7 @@ xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
req->rq_xprt = xprt;
req->rq_xid = xprt_alloc_xid(xprt);
dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
req, req->rq_xid);
req, ntohl(req->rq_xid));
}
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment