Commit 0d10c2c1 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-3.17' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "This includes a major rewrite of the NFSv4 state code, which has
  always depended on a single mutex.  As an example, open creates are no
  longer serialized, fixing a performance regression on NFSv3->NFSv4
  upgrades.  Thanks to Jeff, Trond, and Benny, and to Christoph for
  review.

  Also some RDMA fixes from Chuck Lever and Steve Wise, and
  miscellaneous fixes from Kinglong Mee and others"

* 'for-3.17' of git://linux-nfs.org/~bfields/linux: (167 commits)
  svcrdma: remove rdma_create_qp() failure recovery logic
  nfsd: add some comments to the nfsd4 object definitions
  nfsd: remove the client_mutex and the nfs4_lock/unlock_state wrappers
  nfsd: remove nfs4_lock_state: nfs4_state_shutdown_net
  nfsd: remove nfs4_lock_state: nfs4_laundromat
  nfsd: Remove nfs4_lock_state(): reclaim_complete()
  nfsd: Remove nfs4_lock_state(): setclientid, setclientid_confirm, renew
  nfsd: Remove nfs4_lock_state(): exchange_id, create/destroy_session()
  nfsd: Remove nfs4_lock_state(): nfsd4_open and nfsd4_open_confirm
  nfsd: Remove nfs4_lock_state(): nfsd4_delegreturn()
  nfsd: Remove nfs4_lock_state(): nfsd4_open_downgrade + nfsd4_close
  nfsd: Remove nfs4_lock_state(): nfsd4_lock/locku/lockt()
  nfsd: Remove nfs4_lock_state(): nfsd4_release_lockowner
  nfsd: Remove nfs4_lock_state(): nfsd4_test_stateid/nfsd4_free_stateid
  nfsd: Remove nfs4_lock_state(): nfs4_preprocess_stateid_op()
  nfsd: remove old fault injection infrastructure
  nfsd: add more granular locking to *_delegations fault injectors
  nfsd: add more granular locking to forget_openowners fault injector
  nfsd: add more granular locking to forget_locks fault injector
  nfsd: add a list_head arg to nfsd_foreach_client_lock
  ...
parents 023f78b0 d1e458fe
......@@ -47,7 +47,7 @@ struct svc_rqst;
#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \
/ sizeof(struct nfs4_ace))
struct nfs4_acl *nfs4_acl_new(int);
int nfs4_acl_bytes(int entries);
int nfs4_acl_get_whotype(char *, u32);
__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
......
......@@ -28,7 +28,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
validate_process_creds();
/* discard any old override before preparing the new set */
revert_creds(get_cred(current->real_cred));
revert_creds(get_cred(current_real_cred()));
new = prepare_creds();
if (!new)
return -ENOMEM;
......
......@@ -698,8 +698,8 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
kref_get(&item->ex_client->ref);
new->ex_client = item->ex_client;
new->ex_path.dentry = dget(item->ex_path.dentry);
new->ex_path.mnt = mntget(item->ex_path.mnt);
new->ex_path = item->ex_path;
path_get(&item->ex_path);
new->ex_fslocs.locations = NULL;
new->ex_fslocs.locations_count = 0;
new->ex_fslocs.migrated = 0;
......@@ -1253,7 +1253,7 @@ static int e_show(struct seq_file *m, void *p)
return 0;
}
cache_get(&exp->h);
exp_get(exp);
if (cache_check(cd, &exp->h, NULL))
return 0;
exp_put(exp);
......
......@@ -101,9 +101,10 @@ static inline void exp_put(struct svc_export *exp)
cache_put(&exp->h, exp->cd);
}
static inline void exp_get(struct svc_export *exp)
static inline struct svc_export *exp_get(struct svc_export *exp)
{
cache_get(&exp->h);
return exp;
}
struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *);
......
......@@ -17,81 +17,13 @@
struct nfsd_fault_inject_op {
char *file;
u64 (*forget)(struct nfs4_client *, u64);
u64 (*print)(struct nfs4_client *, u64);
u64 (*get)(void);
u64 (*set_val)(u64);
u64 (*set_clnt)(struct sockaddr_storage *, size_t);
};
static struct nfsd_fault_inject_op inject_ops[] = {
{
.file = "forget_clients",
.forget = nfsd_forget_client,
.print = nfsd_print_client,
},
{
.file = "forget_locks",
.forget = nfsd_forget_client_locks,
.print = nfsd_print_client_locks,
},
{
.file = "forget_openowners",
.forget = nfsd_forget_client_openowners,
.print = nfsd_print_client_openowners,
},
{
.file = "forget_delegations",
.forget = nfsd_forget_client_delegations,
.print = nfsd_print_client_delegations,
},
{
.file = "recall_delegations",
.forget = nfsd_recall_client_delegations,
.print = nfsd_print_client_delegations,
},
};
static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op);
static struct dentry *debug_dir;
static void nfsd_inject_set(struct nfsd_fault_inject_op *op, u64 val)
{
u64 count = 0;
if (val == 0)
printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file);
else
printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val);
nfs4_lock_state();
count = nfsd_for_n_state(val, op->forget);
nfs4_unlock_state();
printk(KERN_INFO "NFSD: %s: found %llu", op->file, count);
}
static void nfsd_inject_set_client(struct nfsd_fault_inject_op *op,
struct sockaddr_storage *addr,
size_t addr_size)
{
char buf[INET6_ADDRSTRLEN];
struct nfs4_client *clp;
u64 count;
nfs4_lock_state();
clp = nfsd_find_client(addr, addr_size);
if (clp) {
count = op->forget(clp, 0);
rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
printk(KERN_INFO "NFSD [%s]: Client %s had %llu state object(s)\n", op->file, buf, count);
}
nfs4_unlock_state();
}
static void nfsd_inject_get(struct nfsd_fault_inject_op *op, u64 *val)
{
nfs4_lock_state();
*val = nfsd_for_n_state(0, op->print);
nfs4_unlock_state();
}
static ssize_t fault_inject_read(struct file *file, char __user *buf,
size_t len, loff_t *ppos)
{
......@@ -99,9 +31,10 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf,
char read_buf[25];
size_t size;
loff_t pos = *ppos;
struct nfsd_fault_inject_op *op = file_inode(file)->i_private;
if (!pos)
nfsd_inject_get(file_inode(file)->i_private, &val);
val = op->get();
size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val);
return simple_read_from_buffer(buf, len, ppos, read_buf, size);
......@@ -114,18 +47,36 @@ static ssize_t fault_inject_write(struct file *file, const char __user *buf,
size_t size = min(sizeof(write_buf) - 1, len);
struct net *net = current->nsproxy->net_ns;
struct sockaddr_storage sa;
struct nfsd_fault_inject_op *op = file_inode(file)->i_private;
u64 val;
char *nl;
if (copy_from_user(write_buf, buf, size))
return -EFAULT;
write_buf[size] = '\0';
/* Deal with any embedded newlines in the string */
nl = strchr(write_buf, '\n');
if (nl) {
size = nl - write_buf;
*nl = '\0';
}
size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa));
if (size > 0)
nfsd_inject_set_client(file_inode(file)->i_private, &sa, size);
else {
if (size > 0) {
val = op->set_clnt(&sa, size);
if (val)
pr_info("NFSD [%s]: Client %s had %llu state object(s)\n",
op->file, write_buf, val);
} else {
val = simple_strtoll(write_buf, NULL, 0);
nfsd_inject_set(file_inode(file)->i_private, val);
if (val == 0)
pr_info("NFSD Fault Injection: %s (all)", op->file);
else
pr_info("NFSD Fault Injection: %s (n = %llu)",
op->file, val);
val = op->set_val(val);
pr_info("NFSD: %s: found %llu", op->file, val);
}
return len; /* on success, claim we got the whole input */
}
......@@ -141,6 +92,41 @@ void nfsd_fault_inject_cleanup(void)
debugfs_remove_recursive(debug_dir);
}
static struct nfsd_fault_inject_op inject_ops[] = {
{
.file = "forget_clients",
.get = nfsd_inject_print_clients,
.set_val = nfsd_inject_forget_clients,
.set_clnt = nfsd_inject_forget_client,
},
{
.file = "forget_locks",
.get = nfsd_inject_print_locks,
.set_val = nfsd_inject_forget_locks,
.set_clnt = nfsd_inject_forget_client_locks,
},
{
.file = "forget_openowners",
.get = nfsd_inject_print_openowners,
.set_val = nfsd_inject_forget_openowners,
.set_clnt = nfsd_inject_forget_client_openowners,
},
{
.file = "forget_delegations",
.get = nfsd_inject_print_delegations,
.set_val = nfsd_inject_forget_delegations,
.set_clnt = nfsd_inject_forget_client_delegations,
},
{
.file = "recall_delegations",
.get = nfsd_inject_print_delegations,
.set_val = nfsd_inject_recall_delegations,
.set_clnt = nfsd_inject_recall_client_delegations,
},
};
#define NUM_INJECT_OPS (sizeof(inject_ops)/sizeof(struct nfsd_fault_inject_op))
int nfsd_fault_inject_init(void)
{
unsigned int i;
......
......@@ -29,14 +29,19 @@
#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS)
#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1)
#define LOCKOWNER_INO_HASH_BITS 8
#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS)
#define SESSION_HASH_SIZE 512
struct cld_net;
struct nfsd4_client_tracking_ops;
/*
* Represents a nfsd "container". With respect to nfsv4 state tracking, the
* fields of interest are the *_id_hashtbls and the *_name_tree. These track
* the nfs4_client objects by either short or long form clientid.
*
* Each nfsd_net runs a nfs4_laundromat workqueue job when necessary to clean
* up expired clients and delegations within the container.
*/
struct nfsd_net {
struct cld_net *cld_net;
......@@ -66,8 +71,6 @@ struct nfsd_net {
struct rb_root conf_name_tree;
struct list_head *unconf_id_hashtbl;
struct rb_root unconf_name_tree;
struct list_head *ownerstr_hashtbl;
struct list_head *lockowner_ino_hashtbl;
struct list_head *sessionid_hashtbl;
/*
* client_lru holds client queue ordered by nfs4_client.cl_time
......@@ -97,10 +100,16 @@ struct nfsd_net {
bool nfsd_net_up;
bool lockd_up;
/* Time of server startup */
struct timeval nfssvc_boot;
/*
* Time of server startup
* Max number of connections this nfsd container will allow. Defaults
* to '0' which is means that it bases this on the number of threads.
*/
struct timeval nfssvc_boot;
unsigned int max_connections;
u32 clientid_counter;
struct svc_serv *nfsd_serv;
};
......
......@@ -54,14 +54,14 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
acl = get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl)) {
nfserr = nfserrno(PTR_ERR(acl));
goto fail;
}
if (acl == NULL) {
/* Solaris returns the inode's minimum ACL. */
acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
}
if (IS_ERR(acl)) {
nfserr = nfserrno(PTR_ERR(acl));
goto fail;
}
resp->acl_access = acl;
}
if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {
......
......@@ -47,14 +47,14 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
acl = get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl)) {
nfserr = nfserrno(PTR_ERR(acl));
goto fail;
}
if (acl == NULL) {
/* Solaris returns the inode's minimum ACL. */
acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
}
if (IS_ERR(acl)) {
nfserr = nfserrno(PTR_ERR(acl));
goto fail;
}
resp->acl_access = acl;
}
if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {
......
......@@ -157,11 +157,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
* 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
* + 1 (xdr opaque byte count) = 26
*/
resp->count = argp->count;
if (max_blocksize < resp->count)
resp->count = max_blocksize;
resp->count = min(argp->count, max_blocksize);
svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
fh_copy(&resp->fh, &argp->fh);
......@@ -286,8 +282,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
fh_copy(&resp->dirfh, &argp->ffh);
fh_init(&resp->fh, NFS3_FHSIZE);
nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen,
argp->tname, argp->tlen,
&resp->fh, &argp->attrs);
argp->tname, &resp->fh);
RETURN_STATUS(nfserr);
}
......
......@@ -120,10 +120,7 @@ decode_sattr3(__be32 *p, struct iattr *iap)
iap->ia_valid |= ATTR_SIZE;
p = xdr_decode_hyper(p, &newsize);
if (newsize <= NFS_OFFSET_MAX)
iap->ia_size = newsize;
else
iap->ia_size = NFS_OFFSET_MAX;
iap->ia_size = min_t(u64, newsize, NFS_OFFSET_MAX);
}
if ((tmp = ntohl(*p++)) == 1) { /* set to server time */
iap->ia_valid |= ATTR_ATIME;
......@@ -338,10 +335,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
return 0;
p = xdr_decode_hyper(p, &args->offset);
len = args->count = ntohl(*p++);
if (len > max_blocksize)
len = max_blocksize;
args->count = ntohl(*p++);
len = min(args->count, max_blocksize);
/* set up the kvec */
v=0;
......@@ -349,7 +344,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
struct page *p = *(rqstp->rq_next_page++);
rqstp->rq_vec[v].iov_base = page_address(p);
rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
len -= rqstp->rq_vec[v].iov_len;
v++;
}
......@@ -484,9 +479,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
}
/* now copy next page if there is one */
if (len && !avail && rqstp->rq_arg.page_len) {
avail = rqstp->rq_arg.page_len;
if (avail > PAGE_SIZE)
avail = PAGE_SIZE;
avail = min_t(unsigned int, rqstp->rq_arg.page_len, PAGE_SIZE);
old = page_address(rqstp->rq_arg.pages[0]);
}
while (len && avail && *old) {
......@@ -571,10 +564,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
args->verf = p; p += 2;
args->dircount = ~0;
args->count = ntohl(*p++);
if (args->count > PAGE_SIZE)
args->count = PAGE_SIZE;
args->count = min_t(u32, args->count, PAGE_SIZE);
args->buffer = page_address(*(rqstp->rq_next_page++));
return xdr_argsize_check(rqstp, p);
......@@ -595,10 +585,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
args->dircount = ntohl(*p++);
args->count = ntohl(*p++);
len = (args->count > max_blocksize) ? max_blocksize :
args->count;
args->count = len;
len = args->count = min(args->count, max_blocksize);
while (len > 0) {
struct page *p = *(rqstp->rq_next_page++);
if (!args->buffer)
......@@ -913,8 +900,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
*/
/* truncate filename if too long */
if (namlen > NFS3_MAXNAMLEN)
namlen = NFS3_MAXNAMLEN;
namlen = min(namlen, NFS3_MAXNAMLEN);
slen = XDR_QUADLEN(namlen);
elen = slen + NFS3_ENTRY_BAGGAGE
......
......@@ -146,35 +146,43 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
int size = 0;
pacl = get_acl(inode, ACL_TYPE_ACCESS);
if (!pacl) {
if (!pacl)
pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
if (IS_ERR(pacl))
return PTR_ERR(pacl);
}
if (IS_ERR(pacl))
return PTR_ERR(pacl);
/* allocate for worst case: one (deny, allow) pair each: */
size += 2 * pacl->a_count;
if (S_ISDIR(inode->i_mode)) {
flags = NFS4_ACL_DIR;
dpacl = get_acl(inode, ACL_TYPE_DEFAULT);
if (IS_ERR(dpacl)) {
error = PTR_ERR(dpacl);
goto rel_pacl;
}
if (dpacl)
size += 2 * dpacl->a_count;
}
*acl = nfs4_acl_new(size);
*acl = kmalloc(nfs4_acl_bytes(size), GFP_KERNEL);
if (*acl == NULL) {
error = -ENOMEM;
goto out;
}
(*acl)->naces = 0;
_posix_to_nfsv4_one(pacl, *acl, flags & ~NFS4_ACL_TYPE_DEFAULT);
if (dpacl)
_posix_to_nfsv4_one(dpacl, *acl, flags | NFS4_ACL_TYPE_DEFAULT);
out:
posix_acl_release(pacl);
out:
posix_acl_release(dpacl);
rel_pacl:
posix_acl_release(pacl);
return error;
}
......@@ -872,16 +880,13 @@ ace2type(struct nfs4_ace *ace)
return -1;
}
struct nfs4_acl *
nfs4_acl_new(int n)
/*
* return the size of the struct nfs4_acl required to represent an acl
* with @entries entries.
*/
int nfs4_acl_bytes(int entries)
{
struct nfs4_acl *acl;
acl = kmalloc(sizeof(*acl) + n*sizeof(struct nfs4_ace), GFP_KERNEL);
if (acl == NULL)
return NULL;
acl->naces = 0;
return acl;
return sizeof(struct nfs4_acl) + entries * sizeof(struct nfs4_ace);
}
static struct {
......@@ -935,5 +940,5 @@ __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who)
return 0;
}
WARN_ON_ONCE(1);
return -1;
return nfserr_serverfault;
}
......@@ -337,7 +337,7 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
p = xdr_reserve_space(xdr, 4);
*p++ = xdr_zero; /* truncate */
encode_nfs_fh4(xdr, &dp->dl_fh);
encode_nfs_fh4(xdr, &dp->dl_stid.sc_file->fi_fhandle);
hdr->nops++;
}
......@@ -678,7 +678,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
(clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5))
return -EINVAL;
args.client_name = clp->cl_cred.cr_principal;
args.prognumber = conn->cb_prog,
args.prognumber = conn->cb_prog;
args.protocol = XPRT_TRANSPORT_TCP;
args.authflavor = clp->cl_cred.cr_flavor;
clp->cl_cb_ident = conn->cb_ident;
......@@ -689,7 +689,8 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
clp->cl_cb_session = ses;
args.bc_xprt = conn->cb_xprt;
args.prognumber = clp->cl_cb_session->se_cb_prog;
args.protocol = XPRT_TRANSPORT_BC_TCP;
args.protocol = conn->cb_xprt->xpt_class->xcl_ident |
XPRT_TRANSPORT_BC;
args.authflavor = ses->se_cb_sec.flavor;
}
/* Create RPC client */
......@@ -904,7 +905,7 @@ static void nfsd4_cb_recall_release(void *calldata)
spin_lock(&clp->cl_lock);
list_del(&cb->cb_per_client);
spin_unlock(&clp->cl_lock);
nfs4_put_delegation(dp);
nfs4_put_stid(&dp->dl_stid);
}
}
......@@ -933,7 +934,7 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags);
/*
* Note this won't actually result in a null callback;
* instead, nfsd4_do_callback_rpc() will detect the killed
* instead, nfsd4_run_cb_null() will detect the killed
* client, destroy the rpc client, and stop:
*/
do_probe_callback(clp);
......@@ -1011,9 +1012,9 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
run_nfsd4_cb(cb);
}
static void nfsd4_do_callback_rpc(struct work_struct *w)
static void
nfsd4_run_callback_rpc(struct nfsd4_callback *cb)
{
struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt;
......@@ -1031,9 +1032,22 @@ static void nfsd4_do_callback_rpc(struct work_struct *w)
cb->cb_ops, cb);
}
void nfsd4_init_callback(struct nfsd4_callback *cb)
void
nfsd4_run_cb_null(struct work_struct *w)
{
INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc);
struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback,
cb_work);
nfsd4_run_callback_rpc(cb);
}
void
nfsd4_run_cb_recall(struct work_struct *w)
{
struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback,
cb_work);
nfsd4_prepare_cb_recall(cb->cb_op);
nfsd4_run_callback_rpc(cb);
}
void nfsd4_cb_recall(struct nfs4_delegation *dp)
......
......@@ -177,7 +177,7 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src)
fh_put(dst);
dget(src->fh_dentry);
if (src->fh_export)
cache_get(&src->fh_export->h);
exp_get(src->fh_export);
*dst = *src;
}
......@@ -385,8 +385,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (nfsd4_has_session(cstate))
copy_clientid(&open->op_clientid, cstate->session);
nfs4_lock_state();
/* check seqid for replay. set nfs4_owner */
resp = rqstp->rq_resp;
status = nfsd4_process_open1(&resp->cstate, open, nn);
......@@ -431,8 +429,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
break;
case NFS4_OPEN_CLAIM_PREVIOUS:
status = nfs4_check_open_reclaim(&open->op_clientid,
cstate->minorversion,
nn);
cstate, nn);
if (status)
goto out;
open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
......@@ -461,19 +458,17 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* set, (2) sets open->op_stateid, (3) sets open->op_delegation.
*/
status = nfsd4_process_open2(rqstp, resfh, open);
WARN_ON(status && open->op_created);
WARN(status && open->op_created,
"nfsd4_process_open2 failed to open newly-created file! status=%u\n",
be32_to_cpu(status));
out:
if (resfh && resfh != &cstate->current_fh) {
fh_dup2(&cstate->current_fh, resfh);
fh_put(resfh);
kfree(resfh);
}
nfsd4_cleanup_open_state(open, status);
if (open->op_openowner && !nfsd4_has_session(cstate))
cstate->replay_owner = &open->op_openowner->oo_owner;
nfsd4_cleanup_open_state(cstate, open, status);
nfsd4_bump_seqid(cstate, status);
if (!cstate->replay_owner)
nfs4_unlock_state();
return status;
}
......@@ -581,8 +576,12 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
__be32 verf[2];
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
verf[0] = (__be32)nn->nfssvc_boot.tv_sec;
verf[1] = (__be32)nn->nfssvc_boot.tv_usec;
/*
* This is opaque to client, so no need to byte-swap. Use
* __force to keep sparse happy
*/
verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
verf[1] = (__force __be32)nn->nfssvc_boot.tv_usec;
memcpy(verifier->data, verf, sizeof(verifier->data));
}
......@@ -619,8 +618,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
case NF4LNK:
status = nfsd_symlink(rqstp, &cstate->current_fh,
create->cr_name, create->cr_namelen,
create->cr_linkname, create->cr_linklen,
&resfh, &create->cr_iattr);
create->cr_data, &resfh);
break;
case NF4BLK:
......@@ -909,8 +907,8 @@ nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstat
default:
return nfserr_inval;
}
exp_get(cstate->current_fh.fh_export);
sin->sin_exp = cstate->current_fh.fh_export;
sin->sin_exp = exp_get(cstate->current_fh.fh_export);
fh_put(&cstate->current_fh);
return nfs_ok;
}
......@@ -1289,7 +1287,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
* Don't use the deferral mechanism for NFSv4; compounds make it
* too hard to avoid non-idempotency problems.
*/
rqstp->rq_usedeferral = 0;
rqstp->rq_usedeferral = false;
/*
* According to RFC3010, this takes precedence over all other errors.
......@@ -1391,10 +1389,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
args->ops, args->opcnt, resp->opcnt, op->opnum,
be32_to_cpu(status));
if (cstate->replay_owner) {
nfs4_unlock_state();
cstate->replay_owner = NULL;
}
nfsd4_cstate_clear_replay(cstate);
/* XXX Ugh, we need to get rid of this kind of special case: */
if (op->opnum == OP_READ && op->u.read.rd_filp)
fput(op->u.read.rd_filp);
......@@ -1408,7 +1403,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
BUG_ON(cstate->replay_owner);
out:
/* Reset deferral mechanism for RPC deferrals */
rqstp->rq_usedeferral = 1;
rqstp->rq_usedeferral = true;
dprintk("nfsv4 compound returned %d\n", ntohl(status));
return status;
}
......@@ -1520,21 +1515,17 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
u32 maxcount = 0, rlen = 0;
maxcount = svc_max_payload(rqstp);
rlen = op->u.read.rd_length;
if (rlen > maxcount)
rlen = maxcount;
rlen = min(op->u.read.rd_length, maxcount);
return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
}
static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
u32 maxcount = svc_max_payload(rqstp);
u32 rlen = op->u.readdir.rd_maxcount;
u32 maxcount = 0, rlen = 0;
if (rlen > maxcount)
rlen = maxcount;
maxcount = svc_max_payload(rqstp);
rlen = min(op->u.readdir.rd_maxcount, maxcount);
return (op_encode_hdr_size + op_encode_verifier_maxsz +
XDR_QUADLEN(rlen)) * sizeof(__be32);
......
This diff is collapsed.
......@@ -181,28 +181,43 @@ static int zero_clientid(clientid_t *clid)
}
/**
* defer_free - mark an allocation as deferred freed
* @argp: NFSv4 compound argument structure to be freed with
* @release: release callback to free @p, typically kfree()
* @p: pointer to be freed
* svcxdr_tmpalloc - allocate memory to be freed after compound processing
* @argp: NFSv4 compound argument structure
* @p: pointer to be freed (with kfree())
*
* Marks @p to be freed when processing the compound operation
* described in @argp finishes.
*/
static int
defer_free(struct nfsd4_compoundargs *argp,
void (*release)(const void *), void *p)
static void *
svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len)
{
struct tmpbuf *tb;
struct svcxdr_tmpbuf *tb;
tb = kmalloc(sizeof(*tb), GFP_KERNEL);
tb = kmalloc(sizeof(*tb) + len, GFP_KERNEL);
if (!tb)
return -ENOMEM;
tb->buf = p;
tb->release = release;
return NULL;
tb->next = argp->to_free;
argp->to_free = tb;
return 0;
return tb->buf;
}
/*
* For xdr strings that need to be passed to other kernel api's
* as null-terminated strings.
*
* Note null-terminating in place usually isn't safe since the
* buffer might end on a page boundary.
*/
static char *
svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len)
{
char *p = svcxdr_tmpalloc(argp, len + 1);
if (!p)
return NULL;
memcpy(p, buf, len);
p[len] = '\0';
return p;
}
/**
......@@ -217,19 +232,13 @@ defer_free(struct nfsd4_compoundargs *argp,
*/
static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
{
if (p == argp->tmp) {
p = kmemdup(argp->tmp, nbytes, GFP_KERNEL);
if (!p)
return NULL;
} else {
BUG_ON(p != argp->tmpp);
argp->tmpp = NULL;
}
if (defer_free(argp, kfree, p)) {
kfree(p);
void *ret;
ret = svcxdr_tmpalloc(argp, nbytes);
if (!ret)
return NULL;
} else
return (char *)p;
memcpy(ret, p, nbytes);
return ret;
}
static __be32
......@@ -292,12 +301,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
if (nace > NFS4_ACL_MAX)
return nfserr_fbig;
*acl = nfs4_acl_new(nace);
*acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace));
if (*acl == NULL)
return nfserr_jukebox;
defer_free(argp, kfree, *acl);
(*acl)->naces = nace;
for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) {
READ_BUF(16); len += 16;
......@@ -418,12 +425,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
return nfserr_badlabel;
len += (XDR_QUADLEN(dummy32) << 2);
READMEM(buf, dummy32);
label->data = kzalloc(dummy32 + 1, GFP_KERNEL);
label->len = dummy32;
label->data = svcxdr_dupstr(argp, buf, dummy32);
if (!label->data)
return nfserr_jukebox;
label->len = dummy32;
defer_free(argp, kfree, label->data);
memcpy(label->data, buf, dummy32);
}
#endif
......@@ -598,20 +603,11 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
switch (create->cr_type) {
case NF4LNK:
READ_BUF(4);
create->cr_linklen = be32_to_cpup(p++);
READ_BUF(create->cr_linklen);
/*
* The VFS will want a null-terminated string, and
* null-terminating in place isn't safe since this might
* end on a page boundary:
*/
create->cr_linkname =
kmalloc(create->cr_linklen + 1, GFP_KERNEL);
if (!create->cr_linkname)
create->cr_datalen = be32_to_cpup(p++);
READ_BUF(create->cr_datalen);
create->cr_data = svcxdr_dupstr(argp, p, create->cr_datalen);
if (!create->cr_data)
return nfserr_jukebox;
memcpy(create->cr_linkname, p, create->cr_linklen);
create->cr_linkname[create->cr_linklen] = '\0';
defer_free(argp, kfree, create->cr_linkname);
break;
case NF4BLK:
case NF4CHR:
......@@ -1481,13 +1477,12 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta
INIT_LIST_HEAD(&test_stateid->ts_stateid_list);
for (i = 0; i < test_stateid->ts_num_ids; i++) {
stateid = kmalloc(sizeof(struct nfsd4_test_stateid_id), GFP_KERNEL);
stateid = svcxdr_tmpalloc(argp, sizeof(*stateid));
if (!stateid) {
status = nfserrno(-ENOMEM);
goto out;
}
defer_free(argp, kfree, stateid);
INIT_LIST_HEAD(&stateid->ts_id_list);
list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list);
......@@ -1640,7 +1635,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
goto xdr_error;
if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
if (!argp->ops) {
argp->ops = argp->iops;
dprintk("nfsd: couldn't allocate room for COMPOUND\n");
......@@ -3077,11 +3072,8 @@ static __be32 nfsd4_encode_splice_read(
__be32 nfserr;
__be32 *p = xdr->p - 2;
/*
* Don't inline pages unless we know there's room for eof,
* count, and possible padding:
*/
if (xdr->end - xdr->p < 3)
/* Make sure there will be room for padding if needed */
if (xdr->end - xdr->p < 1)
return nfserr_resource;
nfserr = nfsd_splice_read(read->rd_rqstp, file,
......@@ -3147,9 +3139,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
len = maxcount;
v = 0;
thislen = (void *)xdr->end - (void *)xdr->p;
if (len < thislen)
thislen = len;
thislen = min_t(long, len, ((void *)xdr->end - (void *)xdr->p));
p = xdr_reserve_space(xdr, (thislen+3)&~3);
WARN_ON_ONCE(!p);
resp->rqstp->rq_vec[v].iov_base = p;
......@@ -3216,10 +3206,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
xdr_commit_encode(xdr);
maxcount = svc_max_payload(resp->rqstp);
if (maxcount > xdr->buf->buflen - xdr->buf->len)
maxcount = xdr->buf->buflen - xdr->buf->len;
if (maxcount > read->rd_length)
maxcount = read->rd_length;
maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len));
maxcount = min_t(unsigned long, maxcount, read->rd_length);
if (!read->rd_filp) {
err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp,
......@@ -3937,8 +3925,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
*
* XDR note: do not encode rp->rp_buflen: the buffer contains the
* previously sent already encoded operation.
*
* called with nfs4_lock_state() held
*/
void
nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
......@@ -3977,9 +3963,8 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
kfree(args->tmpp);
args->tmpp = NULL;
while (args->to_free) {
struct tmpbuf *tb = args->to_free;
struct svcxdr_tmpbuf *tb = args->to_free;
args->to_free = tb->next;
tb->release(tb->buf);
kfree(tb);
}
return 1;
......@@ -4012,7 +3997,6 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
/*
* All that remains is to write the tag and operation count...
*/
struct nfsd4_compound_state *cs = &resp->cstate;
struct xdr_buf *buf = resp->xdr.buf;
WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
......@@ -4026,19 +4010,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
p += XDR_QUADLEN(resp->taglen);
*p++ = htonl(resp->opcnt);
if (nfsd4_has_session(cs)) {
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct nfs4_client *clp = cs->session->se_client;
if (cs->status != nfserr_replay_cache) {
nfsd4_store_cache_entry(resp);
cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE;
}
/* Renew the clientid on success and on replay */
spin_lock(&nn->client_lock);
nfsd4_put_session(cs->session);
spin_unlock(&nn->client_lock);
put_client_renew(clp);
}
nfsd4_sequence_done(resp);
return 1;
}
......
......@@ -221,7 +221,12 @@ static void
hash_refile(struct svc_cacherep *rp)
{
hlist_del_init(&rp->c_hash);
hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits));
/*
* No point in byte swapping c_xid since we're just using it to pick
* a hash bucket.
*/
hlist_add_head(&rp->c_hash, cache_hash +
hash_32((__force u32)rp->c_xid, maskbits));
}
/*
......@@ -356,7 +361,11 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum)
struct hlist_head *rh;
unsigned int entries = 0;
rh = &cache_hash[hash_32(rqstp->rq_xid, maskbits)];
/*
* No point in byte swapping rq_xid since we're just using it to pick
* a hash bucket.
*/
rh = &cache_hash[hash_32((__force u32)rqstp->rq_xid, maskbits)];
hlist_for_each_entry(rp, rh, c_hash) {
++entries;
if (nfsd_cache_match(rqstp, csum, rp)) {
......
......@@ -39,6 +39,7 @@ enum {
NFSD_Versions,
NFSD_Ports,
NFSD_MaxBlkSize,
NFSD_MaxConnections,
NFSD_SupportedEnctypes,
/*
* The below MUST come last. Otherwise we leave a hole in nfsd_files[]
......@@ -62,6 +63,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size);
static ssize_t write_versions(struct file *file, char *buf, size_t size);
static ssize_t write_ports(struct file *file, char *buf, size_t size);
static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
static ssize_t write_maxconn(struct file *file, char *buf, size_t size);
#ifdef CONFIG_NFSD_V4
static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
......@@ -77,6 +79,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
[NFSD_Versions] = write_versions,
[NFSD_Ports] = write_ports,
[NFSD_MaxBlkSize] = write_maxblksize,
[NFSD_MaxConnections] = write_maxconn,
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = write_leasetime,
[NFSD_Gracetime] = write_gracetime,
......@@ -369,8 +372,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
if (maxsize < NFS_FHSIZE)
return -EINVAL;
if (maxsize > NFS3_FHSIZE)
maxsize = NFS3_FHSIZE;
maxsize = min(maxsize, NFS3_FHSIZE);
if (qword_get(&mesg, mesg, size)>0)
return -EINVAL;
......@@ -871,10 +873,8 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
/* force bsize into allowed range and
* required alignment.
*/
if (bsize < 1024)
bsize = 1024;
if (bsize > NFSSVC_MAXBLKSIZE)
bsize = NFSSVC_MAXBLKSIZE;
bsize = max_t(int, bsize, 1024);
bsize = min_t(int, bsize, NFSSVC_MAXBLKSIZE);
bsize &= ~(1024-1);
mutex_lock(&nfsd_mutex);
if (nn->nfsd_serv) {
......@@ -889,6 +889,44 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
nfsd_max_blksize);
}
/**
* write_maxconn - Set or report the current max number of connections
*
* Input:
* buf: ignored
* size: zero
* OR
*
* Input:
* buf: C string containing an unsigned
* integer value representing the new
* number of max connections
* size: non-zero length of C string in @buf
* Output:
* On success: passed-in buffer filled with '\n'-terminated C string
* containing numeric value of max_connections setting
* for this net namespace;
* return code is the size in bytes of the string
* On error: return code is zero or a negative errno value
*/
static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
{
char *mesg = buf;
struct net *net = file->f_dentry->d_sb->s_fs_info;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
unsigned int maxconn = nn->max_connections;
if (size > 0) {
int rv = get_uint(&mesg, &maxconn);
if (rv)
return rv;
nn->max_connections = maxconn;
}
return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%u\n", maxconn);
}
#ifdef CONFIG_NFSD_V4
static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
time_t *time, struct nfsd_net *nn)
......@@ -1064,6 +1102,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
[NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
[NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO},
#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
......
......@@ -162,7 +162,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
/* deprecated, convert to type 3 */
len = key_len(FSID_ENCODE_DEV)/4;
fh->fh_fsid_type = FSID_ENCODE_DEV;
fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1])));
/*
* struct knfsd_fh uses host-endian fields, which are
* sometimes used to hold net-endian values. This
* confuses sparse, so we must use __force here to
* keep it from complaining.
*/
fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]),
ntohl((__force __be32)fh->fh_fsid[1])));
fh->fh_fsid[1] = fh->fh_fsid[2];
}
data_left -= len;
......@@ -539,8 +546,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
dentry);
fhp->fh_dentry = dget(dentry); /* our internal copy */
fhp->fh_export = exp;
cache_get(&exp->h);
fhp->fh_export = exp_get(exp);
if (fhp->fh_handle.fh_version == 0xca) {
/* old style filehandle please */
......
......@@ -73,8 +73,15 @@ enum fsid_source {
extern enum fsid_source fsid_source(struct svc_fh *fhp);
/* This might look a little large to "inline" but in all calls except
/*
* This might look a little large to "inline" but in all calls except
* one, 'vers' is constant so moste of the function disappears.
*
* In some cases the values are considered to be host endian and in
* others, net endian. fsidv is always considered to be u32 as the
* callers don't know which it will be. So we must use __force to keep
* sparse from complaining. Since these values are opaque to the
* client, that shouldn't be a problem.
*/
static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
u32 fsid, unsigned char *uuid)
......@@ -82,7 +89,7 @@ static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
u32 *up;
switch(vers) {
case FSID_DEV:
fsidv[0] = htonl((MAJOR(dev)<<16) |
fsidv[0] = (__force __u32)htonl((MAJOR(dev)<<16) |
MINOR(dev));
fsidv[1] = ino_t_to_u32(ino);
break;
......@@ -90,8 +97,8 @@ static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
fsidv[0] = fsid;
break;
case FSID_MAJOR_MINOR:
fsidv[0] = htonl(MAJOR(dev));
fsidv[1] = htonl(MINOR(dev));
fsidv[0] = (__force __u32)htonl(MAJOR(dev));
fsidv[1] = (__force __u32)htonl(MINOR(dev));
fsidv[2] = ino_t_to_u32(ino);
break;
......
......@@ -403,12 +403,13 @@ nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
fh_init(&newfh, NFS_FHSIZE);
/*
* Create the link, look up new file and set attrs.
* Crazy hack: the request fits in a page, and already-decoded
* attributes follow argp->tname, so it's safe to just write a
* null to ensure it's null-terminated:
*/
argp->tname[argp->tlen] = '\0';
nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
argp->tname, argp->tlen,
&newfh, &argp->attrs);
argp->tname, &newfh);
fh_put(&argp->ffh);
fh_put(&newfh);
......@@ -716,6 +717,7 @@ nfserrno (int errno)
{ nfserr_noent, -ENOENT },
{ nfserr_io, -EIO },
{ nfserr_nxio, -ENXIO },
{ nfserr_fbig, -E2BIG },
{ nfserr_acces, -EACCES },
{ nfserr_exist, -EEXIST },
{ nfserr_xdev, -EXDEV },
......@@ -743,6 +745,7 @@ nfserrno (int errno)
{ nfserr_notsupp, -EOPNOTSUPP },
{ nfserr_toosmall, -ETOOSMALL },
{ nfserr_serverfault, -ESERVERFAULT },
{ nfserr_serverfault, -ENFILE },
};
int i;
......@@ -750,7 +753,7 @@ nfserrno (int errno)
if (nfs_errtbl[i].syserr == errno)
return nfs_errtbl[i].nfserr;
}
printk (KERN_INFO "nfsd: non-standard errno: %d\n", errno);
WARN(1, "nfsd: non-standard errno: %d\n", errno);
return nfserr_io;
}
......@@ -221,7 +221,8 @@ static int nfsd_startup_generic(int nrservs)
*/
ret = nfsd_racache_init(2*nrservs);
if (ret)
return ret;
goto dec_users;
ret = nfs4_state_start();
if (ret)
goto out_racache;
......@@ -229,6 +230,8 @@ static int nfsd_startup_generic(int nrservs)
out_racache:
nfsd_racache_shutdown();
dec_users:
nfsd_users--;
return ret;
}
......@@ -405,6 +408,7 @@ int nfsd_create_serv(struct net *net)
if (nn->nfsd_serv == NULL)
return -ENOMEM;
nn->nfsd_serv->sv_maxconn = nn->max_connections;
error = svc_bind(nn->nfsd_serv, net);
if (error < 0) {
svc_destroy(nn->nfsd_serv);
......@@ -469,8 +473,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
/* enforce a global maximum number of threads */
tot = 0;
for (i = 0; i < n; i++) {
if (nthreads[i] > NFSD_MAXSERVS)
nthreads[i] = NFSD_MAXSERVS;
nthreads[i] = min(nthreads[i], NFSD_MAXSERVS);
tot += nthreads[i];
}
if (tot > NFSD_MAXSERVS) {
......@@ -519,11 +522,11 @@ nfsd_svc(int nrservs, struct net *net)
mutex_lock(&nfsd_mutex);
dprintk("nfsd: creating service\n");
if (nrservs <= 0)
nrservs = 0;
if (nrservs > NFSD_MAXSERVS)
nrservs = NFSD_MAXSERVS;
nrservs = max(nrservs, 0);
nrservs = min(nrservs, NFSD_MAXSERVS);
error = 0;
if (nrservs == 0 && nn->nfsd_serv == NULL)
goto out;
......@@ -564,6 +567,7 @@ nfsd(void *vrqstp)
struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list);
struct net *net = perm_sock->xpt_net;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
int err;
/* Lock module and set up kernel thread */
......@@ -597,6 +601,9 @@ nfsd(void *vrqstp)
* The main request loop
*/
for (;;) {
/* Update sv_maxconn if it has changed */
rqstp->rq_server->sv_maxconn = nn->max_connections;
/*
* Find a socket with data available and call its
* recvfrom routine.
......
......@@ -257,8 +257,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
len = args->count = ntohl(*p++);
p++; /* totalcount - unused */
if (len > NFSSVC_MAXBLKSIZE_V2)
len = NFSSVC_MAXBLKSIZE_V2;
len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2);
/* set up somewhere to store response.
* We take pages, put them on reslist and include in iovec
......@@ -268,7 +267,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
struct page *p = *(rqstp->rq_next_page++);
rqstp->rq_vec[v].iov_base = page_address(p);
rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
len -= rqstp->rq_vec[v].iov_len;
v++;
}
......@@ -400,9 +399,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
return 0;
args->cookie = ntohl(*p++);
args->count = ntohl(*p++);
if (args->count > PAGE_SIZE)
args->count = PAGE_SIZE;
args->count = min_t(u32, args->count, PAGE_SIZE);
args->buffer = page_address(*(rqstp->rq_next_page++));
return xdr_argsize_check(rqstp, p);
......@@ -516,10 +513,11 @@ nfssvc_encode_entry(void *ccdv, const char *name,
}
if (cd->offset)
*cd->offset = htonl(offset);
if (namlen > NFS2_MAXNAMLEN)
namlen = NFS2_MAXNAMLEN;/* truncate filename */
/* truncate filename */
namlen = min(namlen, NFS2_MAXNAMLEN);
slen = XDR_QUADLEN(namlen);
if ((buflen = cd->buflen - slen - 4) < 0) {
cd->common.err = nfserr_toosmall;
return -EINVAL;
......
This diff is collapsed.
......@@ -189,8 +189,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
dparent = fhp->fh_dentry;
exp = fhp->fh_export;
exp_get(exp);
exp = exp_get(fhp->fh_export);
/* Lookup the name, but don't follow links */
if (isdotent(name, len)) {
......@@ -464,7 +463,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
if (size_change)
put_write_access(inode);
if (!err)
commit_metadata(fhp);
err = nfserrno(commit_metadata(fhp));
out:
return err;
}
......@@ -820,7 +819,8 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
}
__be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err)
static __be32
nfsd_finish_read(struct file *file, unsigned long *count, int host_err)
{
if (host_err >= 0) {
nfsdstats.io_read += host_err;
......@@ -831,7 +831,7 @@ __be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err)
return nfserrno(host_err);
}
int nfsd_splice_read(struct svc_rqst *rqstp,
__be32 nfsd_splice_read(struct svc_rqst *rqstp,
struct file *file, loff_t offset, unsigned long *count)
{
struct splice_desc sd = {
......@@ -847,7 +847,7 @@ int nfsd_splice_read(struct svc_rqst *rqstp,
return nfsd_finish_read(file, count, host_err);
}
int nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
__be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
unsigned long *count)
{
mm_segment_t oldfs;
......@@ -1121,7 +1121,8 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
if (iap->ia_valid)
return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
return 0;
/* Callers expect file metadata to be committed here */
return nfserrno(commit_metadata(resfhp));
}
/* HPUX client sometimes creates a file in mode 000, and sets size to 0.
......@@ -1253,9 +1254,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = nfsd_create_setattr(rqstp, resfhp, iap);
/*
* nfsd_setattr already committed the child. Transactional filesystems
* had a chance to commit changes for both parent and child
* simultaneously making the following commit_metadata a noop.
* nfsd_create_setattr already committed the child. Transactional
* filesystems had a chance to commit changes for both parent and
* child * simultaneously making the following commit_metadata a
* noop.
*/
err2 = nfserrno(commit_metadata(fhp));
if (err2)
......@@ -1426,7 +1428,8 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = nfsd_create_setattr(rqstp, resfhp, iap);
/*
* nfsd_setattr already committed the child (and possibly also the parent).
* nfsd_create_setattr already committed the child
* (and possibly also the parent).
*/
if (!err)
err = nfserrno(commit_metadata(fhp));
......@@ -1504,16 +1507,15 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
__be32
nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
char *fname, int flen,
char *path, int plen,
struct svc_fh *resfhp,
struct iattr *iap)
char *path,
struct svc_fh *resfhp)
{
struct dentry *dentry, *dnew;
__be32 err, cerr;
int host_err;
err = nfserr_noent;
if (!flen || !plen)
if (!flen || path[0] == '\0')
goto out;
err = nfserr_exist;
if (isdotent(fname, flen))
......@@ -1534,18 +1536,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (IS_ERR(dnew))
goto out_nfserr;
if (unlikely(path[plen] != 0)) {
char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
if (path_alloced == NULL)
host_err = -ENOMEM;
else {
strncpy(path_alloced, path, plen);
path_alloced[plen] = 0;
host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced);
kfree(path_alloced);
}
} else
host_err = vfs_symlink(dentry->d_inode, dnew, path);
host_err = vfs_symlink(dentry->d_inode, dnew, path);
err = nfserrno(host_err);
if (!err)
err = nfserrno(commit_metadata(fhp));
......@@ -2093,8 +2084,7 @@ nfsd_racache_init(int cache_size)
if (raparm_hash[0].pb_head)
return 0;
nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
if (nperbucket < 2)
nperbucket = 2;
nperbucket = max(2, nperbucket);
cache_size = nperbucket * RAPARM_HASH_SIZE;
dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
......
......@@ -74,9 +74,9 @@ struct raparms;
__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *,
struct file **, struct raparms **);
void nfsd_put_tmp_read_open(struct file *, struct raparms *);
int nfsd_splice_read(struct svc_rqst *,
__be32 nfsd_splice_read(struct svc_rqst *,
struct file *, loff_t, unsigned long *);
int nfsd_readv(struct file *, loff_t, struct kvec *, int,
__be32 nfsd_readv(struct file *, loff_t, struct kvec *, int,
unsigned long *);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *);
......@@ -85,8 +85,8 @@ __be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *);
__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
char *name, int len, char *path, int plen,
struct svc_fh *res, struct iattr *);
char *name, int len, char *path,
struct svc_fh *res);
__be32 nfsd_link(struct svc_rqst *, struct svc_fh *,
char *, int, struct svc_fh *);
__be32 nfsd_rename(struct svc_rqst *,
......
......@@ -55,6 +55,7 @@ struct nfsd4_compound_state {
struct svc_fh current_fh;
struct svc_fh save_fh;
struct nfs4_stateowner *replay_owner;
struct nfs4_client *clp;
/* For sessions DRC */
struct nfsd4_session *session;
struct nfsd4_slot *slot;
......@@ -107,8 +108,8 @@ struct nfsd4_create {
u32 cr_type; /* request */
union { /* request */
struct {
u32 namelen;
char *name;
u32 datalen;
char *data;
} link; /* NF4LNK */
struct {
u32 specdata1;
......@@ -121,8 +122,8 @@ struct nfsd4_create {
struct nfs4_acl *cr_acl;
struct xdr_netobj cr_label;
};
#define cr_linklen u.link.namelen
#define cr_linkname u.link.name
#define cr_datalen u.link.datalen
#define cr_data u.link.data
#define cr_specdata1 u.dev.specdata1
#define cr_specdata2 u.dev.specdata2
......@@ -478,6 +479,14 @@ struct nfsd4_op {
bool nfsd4_cache_this_op(struct nfsd4_op *);
/*
* Memory needed just for the duration of processing one compound:
*/
struct svcxdr_tmpbuf {
struct svcxdr_tmpbuf *next;
char buf[];
};
struct nfsd4_compoundargs {
/* scratch variables for XDR decode */
__be32 * p;
......@@ -486,11 +495,7 @@ struct nfsd4_compoundargs {
int pagelen;
__be32 tmp[8];
__be32 * tmpp;
struct tmpbuf {
struct tmpbuf *next;
void (*release)(const void *);
void *buf;
} *to_free;
struct svcxdr_tmpbuf *to_free;
struct svc_rqst *rqstp;
......@@ -574,7 +579,6 @@ extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
struct nfsd4_compound_state *,
struct nfsd4_setclientid_confirm *setclientid_confirm);
extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *);
......@@ -585,6 +589,7 @@ extern __be32 nfsd4_create_session(struct svc_rqst *,
extern __be32 nfsd4_sequence(struct svc_rqst *,
struct nfsd4_compound_state *,
struct nfsd4_sequence *);
extern void nfsd4_sequence_done(struct nfsd4_compoundres *resp);
extern __be32 nfsd4_destroy_session(struct svc_rqst *,
struct nfsd4_compound_state *,
struct nfsd4_destroy_session *);
......@@ -594,7 +599,9 @@ extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
struct nfsd4_open *open, struct nfsd_net *nn);
extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
struct svc_fh *current_fh, struct nfsd4_open *open);
extern void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status);
extern void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate);
extern void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
struct nfsd4_open *open, __be32 status);
extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
extern __be32 nfsd4_close(struct svc_rqst *rqstp,
......@@ -625,6 +632,7 @@ extern __be32 nfsd4_test_stateid(struct svc_rqst *rqstp,
extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid);
extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr);
#endif
/*
......
......@@ -258,6 +258,15 @@ static inline void put_cred(const struct cred *_cred)
#define current_cred() \
rcu_dereference_protected(current->cred, 1)
/**
* current_real_cred - Access the current task's objective credentials
*
* Access the objective credentials of the current task. RCU-safe,
* since nobody else can modify it.
*/
#define current_real_cred() \
rcu_dereference_protected(current->real_cred, 1)
/**
* __task_cred - Access a task's objective credentials
* @task: The task to query
......
......@@ -236,7 +236,7 @@ struct svc_rqst {
struct svc_cred rq_cred; /* auth info */
void * rq_xprt_ctxt; /* transport specific context ptr */
struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
int rq_usedeferral; /* use deferral */
bool rq_usedeferral; /* use deferral */
size_t rq_xprt_hlen; /* xprt header len */
struct xdr_buf rq_arg;
......@@ -277,7 +277,7 @@ struct svc_rqst {
struct auth_domain * rq_gssclient; /* "gss/"-style peer info */
int rq_cachetype;
struct svc_cacherep * rq_cacherep; /* cache info */
int rq_splice_ok; /* turned off in gss privacy
bool rq_splice_ok; /* turned off in gss privacy
* to prevent encrypting page
* cache pages */
wait_queue_head_t rq_wait; /* synchronization */
......
......@@ -174,8 +174,7 @@ struct svcxprt_rdma {
* page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */
#define RPCRDMA_ORD (64/4)
#define RPCRDMA_SQ_DEPTH_MULT 8
#define RPCRDMA_MAX_THREADS 16
#define RPCRDMA_MAX_REQUESTS 16
#define RPCRDMA_MAX_REQUESTS 32
#define RPCRDMA_MAX_REQ_SIZE 4096
/* svc_rdma_marshal.c */
......
......@@ -25,6 +25,7 @@ struct svc_xprt_ops {
void (*xpo_detach)(struct svc_xprt *);
void (*xpo_free)(struct svc_xprt *);
int (*xpo_secure_port)(struct svc_rqst *);
void (*xpo_adjust_wspace)(struct svc_xprt *);
};
struct svc_xprt_class {
......@@ -33,6 +34,7 @@ struct svc_xprt_class {
struct svc_xprt_ops *xcl_ops;
struct list_head xcl_list;
u32 xcl_max_payload;
int xcl_ident;
};
/*
......
......@@ -886,7 +886,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
u32 priv_len, maj_stat;
int pad, saved_len, remaining_len, offset;
rqstp->rq_splice_ok = 0;
rqstp->rq_splice_ok = false;
priv_len = svc_getnl(&buf->head[0]);
if (rqstp->rq_deferred) {
......
......@@ -1086,9 +1086,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
goto err_short_len;
/* Will be turned off only in gss privacy case: */
rqstp->rq_splice_ok = 1;
rqstp->rq_splice_ok = true;
/* Will be turned off only when NFSv4 Sessions are used */
rqstp->rq_usedeferral = 1;
rqstp->rq_usedeferral = true;
rqstp->rq_dropme = false;
/* Setup reply header */
......
......@@ -23,6 +23,7 @@ static int svc_deferred_recv(struct svc_rqst *rqstp);
static struct cache_deferred_req *svc_defer(struct cache_req *req);
static void svc_age_temp_xprts(unsigned long closure);
static void svc_delete_xprt(struct svc_xprt *xprt);
static void svc_xprt_do_enqueue(struct svc_xprt *xprt);
/* apparently the "standard" is that clients close
* idle connections after 5 minutes, servers after
......@@ -222,11 +223,12 @@ static void svc_xprt_received(struct svc_xprt *xprt)
if (!test_bit(XPT_BUSY, &xprt->xpt_flags))
return;
/* As soon as we clear busy, the xprt could be closed and
* 'put', so we need a reference to call svc_xprt_enqueue with:
* 'put', so we need a reference to call svc_xprt_do_enqueue with:
*/
svc_xprt_get(xprt);
smp_mb__before_atomic();
clear_bit(XPT_BUSY, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
svc_xprt_do_enqueue(xprt);
svc_xprt_put(xprt);
}
......@@ -335,12 +337,7 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
return false;
}
/*
* Queue up a transport with data pending. If there are idle nfsd
* processes, wake 'em up.
*
*/
void svc_xprt_enqueue(struct svc_xprt *xprt)
static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
{
struct svc_pool *pool;
struct svc_rqst *rqstp;
......@@ -398,6 +395,18 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
out_unlock:
spin_unlock_bh(&pool->sp_lock);
}
/*
* Queue up a transport with data pending. If there are idle nfsd
* processes, wake 'em up.
*
*/
void svc_xprt_enqueue(struct svc_xprt *xprt)
{
if (test_bit(XPT_BUSY, &xprt->xpt_flags))
return;
svc_xprt_do_enqueue(xprt);
}
EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
/*
......@@ -439,6 +448,8 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
rqstp->rq_reserved = space;
if (xprt->xpt_ops->xpo_adjust_wspace)
xprt->xpt_ops->xpo_adjust_wspace(xprt);
svc_xprt_enqueue(xprt);
}
}
......
......@@ -446,15 +446,43 @@ static void svc_write_space(struct sock *sk)
}
}
static int svc_tcp_has_wspace(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct svc_serv *serv = svsk->sk_xprt.xpt_server;
int required;
if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
return 1;
required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
if (sk_stream_wspace(svsk->sk_sk) >= required ||
(sk_stream_min_wspace(svsk->sk_sk) == 0 &&
atomic_read(&xprt->xpt_reserved) == 0))
return 1;
set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
return 0;
}
static void svc_tcp_write_space(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
struct socket *sock = sk->sk_socket;
if (sk_stream_is_writeable(sk) && sock)
if (!sk_stream_is_writeable(sk) || !sock)
return;
if (!svsk || svc_tcp_has_wspace(&svsk->sk_xprt))
clear_bit(SOCK_NOSPACE, &sock->flags);
svc_write_space(sk);
}
static void svc_tcp_adjust_wspace(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
if (svc_tcp_has_wspace(xprt))
clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
}
/*
* See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
*/
......@@ -692,6 +720,7 @@ static struct svc_xprt_class svc_udp_class = {
.xcl_owner = THIS_MODULE,
.xcl_ops = &svc_udp_ops,
.xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
.xcl_ident = XPRT_TRANSPORT_UDP,
};
static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
......@@ -1197,23 +1226,6 @@ static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
svc_putnl(resv, 0);
}
static int svc_tcp_has_wspace(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct svc_serv *serv = svsk->sk_xprt.xpt_server;
int required;
if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
return 1;
required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
if (sk_stream_wspace(svsk->sk_sk) >= required ||
(sk_stream_min_wspace(svsk->sk_sk) == 0 &&
atomic_read(&xprt->xpt_reserved) == 0))
return 1;
set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
return 0;
}
static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
struct net *net,
struct sockaddr *sa, int salen,
......@@ -1285,6 +1297,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
.xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept,
.xpo_secure_port = svc_sock_secure_port,
.xpo_adjust_wspace = svc_tcp_adjust_wspace,
};
static struct svc_xprt_class svc_tcp_class = {
......@@ -1292,6 +1305,7 @@ static struct svc_xprt_class svc_tcp_class = {
.xcl_owner = THIS_MODULE,
.xcl_ops = &svc_tcp_ops,
.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
.xcl_ident = XPRT_TRANSPORT_TCP,
};
void svc_init_xprt_sock(void)
......
......@@ -509,7 +509,8 @@ void xdr_commit_encode(struct xdr_stream *xdr)
}
EXPORT_SYMBOL_GPL(xdr_commit_encode);
__be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes)
static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
size_t nbytes)
{
static __be32 *p;
int space_left;
......
......@@ -1306,7 +1306,7 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
}
}
spin_unlock(&xprt_list_lock);
printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident);
dprintk("RPC: transport (%d) not supported\n", args->ident);
return ERR_PTR(-EIO);
found:
......
......@@ -43,6 +43,7 @@
#include <linux/sunrpc/debug.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/spinlock.h>
#include <linux/highmem.h>
#include <asm/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
......@@ -435,6 +436,32 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
return ret;
}
/*
* To avoid a separate RDMA READ just for a handful of zero bytes,
* RFC 5666 section 3.7 allows the client to omit the XDR zero pad
* in chunk lists.
*/
static void
rdma_fix_xdr_pad(struct xdr_buf *buf)
{
unsigned int page_len = buf->page_len;
unsigned int size = (XDR_QUADLEN(page_len) << 2) - page_len;
unsigned int offset, pg_no;
char *p;
if (size == 0)
return;
pg_no = page_len >> PAGE_SHIFT;
offset = page_len & ~PAGE_MASK;
p = page_address(buf->pages[pg_no]);
memset(p + offset, 0, size);
buf->page_len += size;
buf->buflen += size;
buf->len += size;
}
static int rdma_read_complete(struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head)
{
......@@ -449,6 +476,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
rqstp->rq_pages[page_no] = head->pages[page_no];
}
/* Point rq_arg.pages past header */
rdma_fix_xdr_pad(&head->arg);
rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
rqstp->rq_arg.page_len = head->arg.page_len;
rqstp->rq_arg.page_base = head->arg.page_base;
......
......@@ -192,6 +192,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
xdr_sge_no++;
BUG_ON(xdr_sge_no > vec->count);
bc -= sge_bytes;
if (sge_no == xprt->sc_max_sge)
break;
}
/* Prepare WRITE WR */
......@@ -209,7 +211,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
atomic_inc(&rdma_stat_write);
if (svc_rdma_send(xprt, &write_wr))
goto err;
return 0;
return write_len - bc;
err:
svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 0);
......@@ -225,7 +227,6 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
{
u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
int write_len;
int max_write;
u32 xdr_off;
int chunk_off;
int chunk_no;
......@@ -239,8 +240,6 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[1];
max_write = xprt->sc_max_sge * PAGE_SIZE;
/* Write chunks start at the pagelist */
for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
xfer_len && chunk_no < arg_ary->wc_nchunks;
......@@ -260,23 +259,21 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
write_len);
chunk_off = 0;
while (write_len) {
int this_write;
this_write = min(write_len, max_write);
ret = send_write(xprt, rqstp,
ntohl(arg_ch->rs_handle),
rs_offset + chunk_off,
xdr_off,
this_write,
write_len,
vec);
if (ret) {
if (ret <= 0) {
dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
ret);
return -EIO;
}
chunk_off += this_write;
xdr_off += this_write;
xfer_len -= this_write;
write_len -= this_write;
chunk_off += ret;
xdr_off += ret;
xfer_len -= ret;
write_len -= ret;
}
}
/* Update the req with the number of chunks actually used */
......@@ -293,7 +290,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
{
u32 xfer_len = rqstp->rq_res.len;
int write_len;
int max_write;
u32 xdr_off;
int chunk_no;
int chunk_off;
......@@ -311,8 +307,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[2];
max_write = xprt->sc_max_sge * PAGE_SIZE;
/* xdr offset starts at RPC message */
nchunks = ntohl(arg_ary->wc_nchunks);
for (xdr_off = 0, chunk_no = 0;
......@@ -330,24 +324,21 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
write_len);
chunk_off = 0;
while (write_len) {
int this_write;
this_write = min(write_len, max_write);
ret = send_write(xprt, rqstp,
ntohl(ch->rs_handle),
rs_offset + chunk_off,
xdr_off,
this_write,
write_len,
vec);
if (ret) {
if (ret <= 0) {
dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
ret);
return -EIO;
}
chunk_off += this_write;
xdr_off += this_write;
xfer_len -= this_write;
write_len -= this_write;
chunk_off += ret;
xdr_off += ret;
xfer_len -= ret;
write_len -= ret;
}
}
/* Update the req with the number of chunks actually used */
......
......@@ -92,6 +92,7 @@ struct svc_xprt_class svc_rdma_class = {
.xcl_owner = THIS_MODULE,
.xcl_ops = &svc_rdma_ops,
.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
.xcl_ident = XPRT_TRANSPORT_RDMA,
};
struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
......@@ -942,23 +943,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
if (ret) {
/*
* XXX: This is a hack. We need a xx_request_qp interface
* that will adjust the qp_attr's with a best-effort
* number
*/
qp_attr.cap.max_send_sge -= 2;
qp_attr.cap.max_recv_sge -= 2;
ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd,
&qp_attr);
if (ret) {
dprintk("svcrdma: failed to create QP, ret=%d\n", ret);
goto errout;
}
newxprt->sc_max_sge = qp_attr.cap.max_send_sge;
newxprt->sc_max_sge = qp_attr.cap.max_recv_sge;
newxprt->sc_sq_depth = qp_attr.cap.max_send_wr;
newxprt->sc_max_requests = qp_attr.cap.max_recv_wr;
dprintk("svcrdma: failed to create QP, ret=%d\n", ret);
goto errout;
}
newxprt->sc_qp = newxprt->sc_cm_id->qp;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment