Commit c85b03ab authored by J. Bruce Fields's avatar J. Bruce Fields

Merge Trond's nfs-for-next

Merging Trond's nfs-for-next branch, mainly to get
b7993ceb "SUNRPC: Allow rpc_create() to
request that TCP slots be unlimited", which a small piece of the
gss-proxy work depends on.
parents bf8d9097 fd068b20
......@@ -144,6 +144,9 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
timeout);
if (ret < 0)
return -ERESTARTSYS;
/* Reset the lock status after a server reboot so we resend */
if (block->b_status == nlm_lck_denied_grace_period)
block->b_status = nlm_lck_blocked;
req->a_res.status = block->b_status;
return 0;
}
......
......@@ -550,9 +550,6 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
if (status < 0)
break;
/* Resend the blocking lock request after a server reboot */
if (resp->status == nlm_lck_denied_grace_period)
continue;
if (resp->status != nlm_lck_blocked)
break;
}
......
......@@ -55,7 +55,8 @@ static void dev_remove(struct net *net, dev_t dev)
bl_pipe_msg.bl_wq = &nn->bl_wq;
memset(msg, 0, sizeof(*msg));
msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS);
msg->len = sizeof(bl_msg) + bl_msg.totallen;
msg->data = kzalloc(msg->len, GFP_NOFS);
if (!msg->data)
goto out;
......@@ -66,7 +67,6 @@ static void dev_remove(struct net *net, dev_t dev)
memcpy(msg->data, &bl_msg, sizeof(bl_msg));
dataptr = (uint8_t *) msg->data;
memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request));
msg->len = sizeof(bl_msg) + bl_msg.totallen;
add_wait_queue(&nn->bl_wq, &wq);
if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) {
......
......@@ -125,6 +125,9 @@ nfs41_callback_svc(void *vrqstp)
set_freezable();
while (!kthread_should_stop()) {
if (try_to_freeze())
continue;
prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
spin_lock_bh(&serv->sv_cb_lock);
if (!list_empty(&serv->sv_cb_list)) {
......
......@@ -500,7 +500,7 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
&args->craa_type_mask))
pnfs_recall_all_layouts(cps->clp);
if (flags)
nfs_expire_all_delegation_types(cps->clp, flags);
nfs_expire_unused_delegation_types(cps->clp, flags);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;
......
......@@ -593,6 +593,8 @@ int nfs_create_rpc_client(struct nfs_client *clp,
args.flags |= RPC_CLNT_CREATE_DISCRTRY;
if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_INFINITE_SLOTS;
if (!IS_ERR(clp->cl_rpcclient))
return 0;
......
......@@ -64,17 +64,15 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags)
return ret;
}
static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state)
static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
{
struct inode *inode = state->inode;
struct file_lock *fl;
int status = 0;
if (inode->i_flock == NULL)
return 0;
if (inode->i_flock == NULL)
goto out;
/* Protect inode->i_flock using the file locks lock */
lock_flocks();
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
......@@ -83,7 +81,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
if (nfs_file_open_context(fl->fl_file) != ctx)
continue;
unlock_flocks();
status = nfs4_lock_delegation_recall(state, fl);
status = nfs4_lock_delegation_recall(fl, state, stateid);
if (status < 0)
goto out;
lock_flocks();
......@@ -120,7 +118,7 @@ static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *s
seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
err = nfs4_open_delegation_recall(ctx, state, stateid);
if (!err)
err = nfs_delegation_claim_locks(ctx, state);
err = nfs_delegation_claim_locks(ctx, state, stateid);
if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
err = -EAGAIN;
mutex_unlock(&sp->so_delegreturn_mutex);
......@@ -389,6 +387,24 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation
return err;
}
static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
{
bool ret = false;
if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
ret = true;
if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) {
struct inode *inode;
spin_lock(&delegation->lock);
inode = delegation->inode;
if (inode && list_empty(&NFS_I(inode)->open_files))
ret = true;
spin_unlock(&delegation->lock);
}
return ret;
}
/**
* nfs_client_return_marked_delegations - return previously marked delegations
* @clp: nfs_client to process
......@@ -411,8 +427,7 @@ int nfs_client_return_marked_delegations(struct nfs_client *clp)
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry_rcu(delegation, &server->delegations,
super_list) {
if (!test_and_clear_bit(NFS_DELEGATION_RETURN,
&delegation->flags))
if (!nfs_delegation_need_return(delegation))
continue;
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL)
......@@ -471,6 +486,13 @@ int nfs4_inode_return_delegation(struct inode *inode)
return err;
}
static void nfs_mark_return_if_closed_delegation(struct nfs_server *server,
struct nfs_delegation *delegation)
{
set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
}
static void nfs_mark_return_delegation(struct nfs_server *server,
struct nfs_delegation *delegation)
{
......@@ -478,6 +500,45 @@ static void nfs_mark_return_delegation(struct nfs_server *server,
set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
}
static bool nfs_server_mark_return_all_delegations(struct nfs_server *server)
{
struct nfs_delegation *delegation;
bool ret = false;
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
nfs_mark_return_delegation(server, delegation);
ret = true;
}
return ret;
}
static void nfs_client_mark_return_all_delegations(struct nfs_client *clp)
{
struct nfs_server *server;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
nfs_server_mark_return_all_delegations(server);
rcu_read_unlock();
}
static void nfs_delegation_run_state_manager(struct nfs_client *clp)
{
if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state))
nfs4_schedule_state_manager(clp);
}
/**
* nfs_expire_all_delegations
* @clp: client to process
*
*/
void nfs_expire_all_delegations(struct nfs_client *clp)
{
nfs_client_mark_return_all_delegations(clp);
nfs_delegation_run_state_manager(clp);
}
/**
* nfs_super_return_all_delegations - return delegations for one superblock
* @sb: sb to process
......@@ -486,24 +547,22 @@ static void nfs_mark_return_delegation(struct nfs_server *server,
void nfs_server_return_all_delegations(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
struct nfs_delegation *delegation;
bool need_wait;
if (clp == NULL)
return;
rcu_read_lock();
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
spin_lock(&delegation->lock);
set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
spin_unlock(&delegation->lock);
}
need_wait = nfs_server_mark_return_all_delegations(server);
rcu_read_unlock();
if (nfs_client_return_marked_delegations(clp) != 0)
if (need_wait) {
nfs4_schedule_state_manager(clp);
nfs4_wait_clnt_recover(clp);
}
}
static void nfs_mark_return_all_delegation_types(struct nfs_server *server,
static void nfs_mark_return_unused_delegation_types(struct nfs_server *server,
fmode_t flags)
{
struct nfs_delegation *delegation;
......@@ -512,27 +571,21 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server,
if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
continue;
if (delegation->type & flags)
nfs_mark_return_delegation(server, delegation);
nfs_mark_return_if_closed_delegation(server, delegation);
}
}
static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp,
static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *clp,
fmode_t flags)
{
struct nfs_server *server;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
nfs_mark_return_all_delegation_types(server, flags);
nfs_mark_return_unused_delegation_types(server, flags);
rcu_read_unlock();
}
static void nfs_delegation_run_state_manager(struct nfs_client *clp)
{
if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state))
nfs4_schedule_state_manager(clp);
}
void nfs_remove_bad_delegation(struct inode *inode)
{
struct nfs_delegation *delegation;
......@@ -546,27 +599,17 @@ void nfs_remove_bad_delegation(struct inode *inode)
EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation);
/**
* nfs_expire_all_delegation_types
* nfs_expire_unused_delegation_types
* @clp: client to process
* @flags: delegation types to expire
*
*/
void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags)
void nfs_expire_unused_delegation_types(struct nfs_client *clp, fmode_t flags)
{
nfs_client_mark_return_all_delegation_types(clp, flags);
nfs_client_mark_return_unused_delegation_types(clp, flags);
nfs_delegation_run_state_manager(clp);
}
/**
* nfs_expire_all_delegations
* @clp: client to process
*
*/
void nfs_expire_all_delegations(struct nfs_client *clp)
{
nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
}
static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
{
struct nfs_delegation *delegation;
......@@ -574,7 +617,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
continue;
nfs_mark_return_delegation(server, delegation);
nfs_mark_return_if_closed_delegation(server, delegation);
}
}
......
......@@ -28,6 +28,7 @@ struct nfs_delegation {
enum {
NFS_DELEGATION_NEED_RECLAIM = 0,
NFS_DELEGATION_RETURN,
NFS_DELEGATION_RETURN_IF_CLOSED,
NFS_DELEGATION_REFERENCED,
NFS_DELEGATION_RETURNING,
};
......@@ -41,7 +42,7 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode);
struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
void nfs_server_return_all_delegations(struct nfs_server *);
void nfs_expire_all_delegations(struct nfs_client *clp);
void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags);
void nfs_expire_unused_delegation_types(struct nfs_client *clp, fmode_t flags);
void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
int nfs_client_return_marked_delegations(struct nfs_client *clp);
int nfs_delegations_present(struct nfs_client *clp);
......@@ -53,7 +54,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
/* NFSv4 delegation-related procedures */
int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync);
int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags);
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
......
......@@ -1486,6 +1486,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
goto no_open;
if (d_mountpoint(dentry))
goto no_open;
if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1)
goto no_open;
inode = dentry->d_inode;
parent = dget_parent(dentry);
......
......@@ -744,6 +744,7 @@ static int
do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
{
struct inode *inode = filp->f_mapping->host;
struct nfs_lock_context *l_ctx;
int status;
/*
......@@ -752,6 +753,14 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
*/
nfs_sync_mapping(filp->f_mapping);
l_ctx = nfs_get_lock_context(nfs_file_open_context(filp));
if (!IS_ERR(l_ctx)) {
status = nfs_iocounter_wait(&l_ctx->io_count);
nfs_put_lock_context(l_ctx);
if (status < 0)
return status;
}
/* NOTE: special case
* If we're signalled while cleaning up locks on process exit, we
* still need to complete the unlock.
......
......@@ -726,9 +726,9 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
return ret;
}
static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data)
static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen)
{
return key_instantiate_and_link(key, data, strlen(data) + 1,
return key_instantiate_and_link(key, data, datalen,
id_resolver_cache->thread_keyring,
authkey);
}
......@@ -738,6 +738,7 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
struct key *key, struct key *authkey)
{
char id_str[NFS_UINT_MAXLEN];
size_t len;
int ret = -ENOKEY;
/* ret = -ENOKEY */
......@@ -747,13 +748,15 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
case IDMAP_CONV_NAMETOID:
if (strcmp(upcall->im_name, im->im_name) != 0)
break;
sprintf(id_str, "%d", im->im_id);
ret = nfs_idmap_instantiate(key, authkey, id_str);
/* Note: here we store the NUL terminator too */
len = sprintf(id_str, "%d", im->im_id) + 1;
ret = nfs_idmap_instantiate(key, authkey, id_str, len);
break;
case IDMAP_CONV_IDTONAME:
if (upcall->im_id != im->im_id)
break;
ret = nfs_idmap_instantiate(key, authkey, im->im_name);
len = strlen(im->im_name);
ret = nfs_idmap_instantiate(key, authkey, im->im_name, len);
break;
default:
ret = -EINVAL;
......
......@@ -561,20 +561,22 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
l_ctx->lockowner.l_owner = current->files;
l_ctx->lockowner.l_pid = current->tgid;
INIT_LIST_HEAD(&l_ctx->list);
nfs_iocounter_init(&l_ctx->io_count);
}
static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
{
struct nfs_lock_context *pos;
struct nfs_lock_context *head = &ctx->lock_context;
struct nfs_lock_context *pos = head;
list_for_each_entry(pos, &ctx->lock_context.list, list) {
do {
if (pos->lockowner.l_owner != current->files)
continue;
if (pos->lockowner.l_pid != current->tgid)
continue;
atomic_inc(&pos->count);
return pos;
}
} while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head);
return NULL;
}
......
......@@ -229,6 +229,13 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr,
void (*release)(struct nfs_pgio_header *hdr));
void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
int nfs_iocounter_wait(struct nfs_io_counter *c);
static inline void nfs_iocounter_init(struct nfs_io_counter *c)
{
c->flags = 0;
atomic_set(&c->io_count, 0);
}
/* nfs2xdr.c */
extern struct rpc_procinfo nfs_procedures[];
......
......@@ -36,6 +36,7 @@ enum nfs4_client_state {
struct nfs4_minor_version_ops {
u32 minor_version;
unsigned init_caps;
int (*call_sync)(struct rpc_clnt *clnt,
struct nfs_server *server,
......@@ -143,12 +144,14 @@ struct nfs4_lock_state {
enum {
LK_STATE_IN_USE,
NFS_DELEGATED_STATE, /* Current stateid is delegation */
NFS_OPEN_STATE, /* OPEN stateid is set */
NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */
NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */
NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */
NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */
NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */
NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */
NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */
};
struct nfs4_state {
......@@ -233,6 +236,10 @@ extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr
extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
extern int nfs4_release_lockowner(struct nfs4_lock_state *);
extern const struct xattr_handler *nfs4_xattr_handlers[];
extern int nfs4_set_rw_stateid(nfs4_stateid *stateid,
const struct nfs_open_context *ctx,
const struct nfs_lock_context *l_ctx,
fmode_t fmode);
#if defined(CONFIG_NFS_V4_1)
static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
......@@ -347,13 +354,13 @@ extern int nfs4_wait_clnt_recover(struct nfs_client *clp);
extern int nfs4_client_recover_expired_lease(struct nfs_client *clp);
extern void nfs4_schedule_state_manager(struct nfs_client *);
extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp);
extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
extern int nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_server_scope(struct nfs_client *,
struct nfs41_server_scope **);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *,
extern int nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *,
fmode_t, const struct nfs_lockowner *);
extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
......@@ -412,6 +419,11 @@ static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_statei
return memcmp(dst, src, sizeof(*dst)) == 0;
}
static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state)
{
return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
}
#else
#define nfs4_close_state(a, b) do { } while (0)
......
......@@ -198,6 +198,8 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
/* Check NFS protocol revision and initialize RPC op vector */
clp->rpc_ops = &nfs_v4_clientops;
if (clp->cl_minorversion != 0)
__set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags);
__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
error = nfs_create_rpc_client(clp, timeparms, authflavour);
if (error < 0)
......@@ -717,6 +719,19 @@ static int nfs4_server_common_setup(struct nfs_server *server,
if (error < 0)
goto out;
/* Set the basic capabilities */
server->caps |= server->nfs_client->cl_mvops->init_caps;
if (server->flags & NFS_MOUNT_NORDIRPLUS)
server->caps &= ~NFS_CAP_READDIRPLUS;
/*
* Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
* authentication.
*/
if (nfs4_disable_idmapping &&
server->client->cl_auth->au_flavor == RPC_AUTH_UNIX)
server->caps |= NFS_CAP_UIDGID_NOMAP;
/* Probe the root fh to retrieve its FSID and filehandle */
error = nfs4_get_rootfh(server, mntfh);
if (error < 0)
......@@ -760,9 +775,6 @@ static int nfs4_init_server(struct nfs_server *server,
/* Initialise the client representation from the mount data */
server->flags = data->flags;
server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK;
if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
server->caps |= NFS_CAP_READDIRPLUS;
server->options = data->options;
/* Get a client record */
......@@ -779,13 +791,6 @@ static int nfs4_init_server(struct nfs_server *server,
if (error < 0)
goto error;
/*
* Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
* authentication.
*/
if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX)
server->caps |= NFS_CAP_UIDGID_NOMAP;
if (data->rsize)
server->rsize = nfs_block_size(data->rsize, NULL);
if (data->wsize)
......@@ -863,7 +868,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
/* Initialise the client representation from the parent server */
nfs_server_copy_userdata(server, parent_server);
server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR;
/* Get a client representation.
* Note: NFSv4 always uses TCP, */
......
......@@ -129,7 +129,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo)
{
if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
return;
clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
pnfs_return_layout(inode);
}
......@@ -159,11 +158,14 @@ static int filelayout_async_handle_error(struct rpc_task *task,
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
nfs4_schedule_stateid_recovery(mds_server, state);
if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
goto out_bad_stateid;
goto wait_on_recovery;
case -NFS4ERR_EXPIRED:
if (state != NULL)
nfs4_schedule_stateid_recovery(mds_server, state);
if (state != NULL) {
if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
goto out_bad_stateid;
}
nfs4_schedule_lease_recovery(mds_client);
goto wait_on_recovery;
/* DS session errors */
......@@ -227,6 +229,9 @@ static int filelayout_async_handle_error(struct rpc_task *task,
out:
task->tk_status = 0;
return -EAGAIN;
out_bad_stateid:
task->tk_status = -EIO;
return 0;
wait_on_recovery:
rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
......@@ -300,6 +305,10 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
{
struct nfs_read_data *rdata = data;
if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) {
rpc_exit(task, -EIO);
return;
}
if (filelayout_reset_to_mds(rdata->header->lseg)) {
dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
filelayout_reset_read(rdata);
......@@ -308,10 +317,13 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
}
rdata->read_done_cb = filelayout_read_done_cb;
nfs41_setup_sequence(rdata->ds_clp->cl_session,
if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
&rdata->args.seq_args,
&rdata->res.seq_res,
task);
task))
return;
nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context,
rdata->args.lock_context, FMODE_READ);
}
static void filelayout_read_call_done(struct rpc_task *task, void *data)
......@@ -402,16 +414,23 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
{
struct nfs_write_data *wdata = data;
if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) {
rpc_exit(task, -EIO);
return;
}
if (filelayout_reset_to_mds(wdata->header->lseg)) {
dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
filelayout_reset_write(wdata);
rpc_exit(task, 0);
return;
}
nfs41_setup_sequence(wdata->ds_clp->cl_session,
if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
&wdata->args.seq_args,
&wdata->res.seq_res,
task);
task))
return;
nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context,
wdata->args.lock_context, FMODE_WRITE);
}
static void filelayout_write_call_done(struct rpc_task *task, void *data)
......
This diff is collapsed.
......@@ -699,6 +699,8 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
list_for_each_entry(state, &nfsi->open_states, inode_states) {
if (state->owner != owner)
continue;
if (!nfs4_valid_open_stateid(state))
continue;
if (atomic_inc_not_zero(&state->count))
return state;
}
......@@ -987,13 +989,14 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
return 0;
}
static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state,
static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
struct nfs4_state *state,
const struct nfs_lockowner *lockowner)
{
struct nfs4_lock_state *lsp;
fl_owner_t fl_owner;
pid_t fl_pid;
bool ret = false;
int ret = -ENOENT;
if (lockowner == NULL)
......@@ -1008,7 +1011,10 @@ static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state,
lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
nfs4_stateid_copy(dst, &lsp->ls_stateid);
ret = true;
ret = 0;
smp_rmb();
if (!list_empty(&lsp->ls_seqid.list))
ret = -EWOULDBLOCK;
}
spin_unlock(&state->state_lock);
nfs4_put_lock_state(lsp);
......@@ -1016,28 +1022,44 @@ static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state,
return ret;
}
static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
{
const nfs4_stateid *src;
int ret;
int seq;
do {
src = &zero_stateid;
seq = read_seqbegin(&state->seqlock);
nfs4_stateid_copy(dst, &state->stateid);
if (test_bit(NFS_OPEN_STATE, &state->flags))
src = &state->open_stateid;
nfs4_stateid_copy(dst, src);
ret = 0;
smp_rmb();
if (!list_empty(&state->owner->so_seqid.list))
ret = -EWOULDBLOCK;
} while (read_seqretry(&state->seqlock, seq));
return ret;
}
/*
* Byte-range lock aware utility to initialize the stateid of read/write
* requests.
*/
void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state,
int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state,
fmode_t fmode, const struct nfs_lockowner *lockowner)
{
int ret = 0;
if (nfs4_copy_delegation_stateid(dst, state->inode, fmode))
return;
if (nfs4_copy_lock_stateid(dst, state, lockowner))
return;
nfs4_copy_open_stateid(dst, state);
goto out;
ret = nfs4_copy_lock_stateid(dst, state, lockowner);
if (ret != -ENOENT)
goto out;
ret = nfs4_copy_open_stateid(dst, state);
out:
if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41))
dst->seqid = 0;
return ret;
}
struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask)
......@@ -1286,14 +1308,17 @@ static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_s
return 1;
}
void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
{
struct nfs_client *clp = server->nfs_client;
if (!nfs4_valid_open_stateid(state))
return -EBADF;
nfs4_state_mark_reclaim_nograce(clp, state);
dprintk("%s: scheduling stateid recovery for server %s\n", __func__,
clp->cl_hostname);
nfs4_schedule_state_manager(clp);
return 0;
}
EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery);
......@@ -1323,6 +1348,27 @@ void nfs_inode_find_state_and_recover(struct inode *inode,
nfs4_schedule_state_manager(clp);
}
static void nfs4_state_mark_open_context_bad(struct nfs4_state *state)
{
struct inode *inode = state->inode;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_context *ctx;
spin_lock(&inode->i_lock);
list_for_each_entry(ctx, &nfsi->open_files, list) {
if (ctx->state != state)
continue;
set_bit(NFS_CONTEXT_BAD, &ctx->flags);
}
spin_unlock(&inode->i_lock);
}
static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error)
{
set_bit(NFS_STATE_RECOVERY_FAILED, &state->flags);
nfs4_state_mark_open_context_bad(state);
}
static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
{
......@@ -1398,6 +1444,8 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
list_for_each_entry(state, &sp->so_states, open_states) {
if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
continue;
if (!nfs4_valid_open_stateid(state))
continue;
if (state->state == 0)
continue;
atomic_inc(&state->count);
......@@ -1430,11 +1478,10 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
* Open state on this file cannot be recovered
* All we can do is revert to using the zero stateid.
*/
memset(&state->stateid, 0,
sizeof(state->stateid));
/* Mark the file as being 'closed' */
state->state = 0;
nfs4_state_mark_recovery_failed(state, status);
break;
case -EAGAIN:
ssleep(1);
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_BAD_STATEID:
......@@ -1696,6 +1743,10 @@ static int nfs4_check_lease(struct nfs_client *clp)
}
status = ops->renew_lease(clp, cred);
put_rpccred(cred);
if (status == -ETIMEDOUT) {
set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
return 0;
}
out:
return nfs4_recovery_handle_error(clp, status);
}
......
......@@ -1058,8 +1058,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
if (iap->ia_valid & ATTR_ATIME_SET) {
bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
*p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
*p++ = cpu_to_be32(0);
*p++ = cpu_to_be32(iap->ia_atime.tv_sec);
p = xdr_encode_hyper(p, (s64)iap->ia_atime.tv_sec);
*p++ = cpu_to_be32(iap->ia_atime.tv_nsec);
}
else if (iap->ia_valid & ATTR_ATIME) {
......@@ -1069,8 +1068,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
if (iap->ia_valid & ATTR_MTIME_SET) {
bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
*p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
*p++ = cpu_to_be32(0);
*p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
p = xdr_encode_hyper(p, (s64)iap->ia_mtime.tv_sec);
*p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
}
else if (iap->ia_valid & ATTR_MTIME) {
......@@ -1366,34 +1364,29 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
{
struct iattr dummy;
__be32 *p;
struct nfs_client *clp;
p = reserve_space(xdr, 4);
switch(arg->open_flags & O_EXCL) {
case 0:
switch(arg->createmode) {
case NFS4_CREATE_UNCHECKED:
*p = cpu_to_be32(NFS4_CREATE_UNCHECKED);
encode_attrs(xdr, arg->u.attrs, arg->server);
break;
default:
clp = arg->server->nfs_client;
if (clp->cl_mvops->minor_version > 0) {
if (nfs4_has_persistent_session(clp)) {
case NFS4_CREATE_GUARDED:
*p = cpu_to_be32(NFS4_CREATE_GUARDED);
encode_attrs(xdr, arg->u.attrs, arg->server);
} else {
struct iattr dummy;
break;
case NFS4_CREATE_EXCLUSIVE:
*p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE);
encode_nfs4_verifier(xdr, &arg->u.verifier);
break;
case NFS4_CREATE_EXCLUSIVE4_1:
*p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1);
encode_nfs4_verifier(xdr, &arg->u.verifier);
dummy.ia_valid = 0;
encode_attrs(xdr, &dummy, arg->server);
}
} else {
*p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE);
encode_nfs4_verifier(xdr, &arg->u.verifier);
}
}
}
static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *arg)
......@@ -1459,6 +1452,23 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc
encode_string(xdr, name->len, name->name);
}
static inline void encode_claim_fh(struct xdr_stream *xdr)
{
__be32 *p;
p = reserve_space(xdr, 4);
*p = cpu_to_be32(NFS4_OPEN_CLAIM_FH);
}
static inline void encode_claim_delegate_cur_fh(struct xdr_stream *xdr, const nfs4_stateid *stateid)
{
__be32 *p;
p = reserve_space(xdr, 4);
*p = cpu_to_be32(NFS4_OPEN_CLAIM_DELEG_CUR_FH);
encode_nfs4_stateid(xdr, stateid);
}
static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr)
{
encode_op_hdr(xdr, OP_OPEN, decode_open_maxsz, hdr);
......@@ -1474,6 +1484,12 @@ static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg,
case NFS4_OPEN_CLAIM_DELEGATE_CUR:
encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation);
break;
case NFS4_OPEN_CLAIM_FH:
encode_claim_fh(xdr);
break;
case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
encode_claim_delegate_cur_fh(xdr, &arg->u.delegation);
break;
default:
BUG();
}
......@@ -1506,35 +1522,12 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr);
}
static void encode_open_stateid(struct xdr_stream *xdr,
const struct nfs_open_context *ctx,
const struct nfs_lock_context *l_ctx,
fmode_t fmode,
int zero_seqid)
{
nfs4_stateid stateid;
if (ctx->state != NULL) {
const struct nfs_lockowner *lockowner = NULL;
if (l_ctx != NULL)
lockowner = &l_ctx->lockowner;
nfs4_select_rw_stateid(&stateid, ctx->state,
fmode, lockowner);
if (zero_seqid)
stateid.seqid = 0;
encode_nfs4_stateid(xdr, &stateid);
} else
encode_nfs4_stateid(xdr, &zero_stateid);
}
static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
{
__be32 *p;
encode_op_hdr(xdr, OP_READ, decode_read_maxsz, hdr);
encode_open_stateid(xdr, args->context, args->lock_context,
FMODE_READ, hdr->minorversion);
encode_nfs4_stateid(xdr, &args->stateid);
p = reserve_space(xdr, 12);
p = xdr_encode_hyper(p, args->offset);
......@@ -1670,8 +1663,7 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
__be32 *p;
encode_op_hdr(xdr, OP_WRITE, decode_write_maxsz, hdr);
encode_open_stateid(xdr, args->context, args->lock_context,
FMODE_WRITE, hdr->minorversion);
encode_nfs4_stateid(xdr, &args->stateid);
p = reserve_space(xdr, 16);
p = xdr_encode_hyper(p, args->offset);
......@@ -3497,8 +3489,11 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
if (n == 0)
goto root_path;
dprintk("pathname4: ");
path->ncomponents = 0;
while (path->ncomponents < n) {
if (n > NFS4_PATHNAME_MAXCOMPONENTS) {
dprintk("cannot parse %d components in path\n", n);
goto out_eio;
}
for (path->ncomponents = 0; path->ncomponents < n; path->ncomponents++) {
struct nfs4_string *component = &path->components[path->ncomponents];
status = decode_opaque_inline(xdr, &component->len, &component->data);
if (unlikely(status != 0))
......@@ -3507,12 +3502,6 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
pr_cont("%s%.*s ",
(path->ncomponents != n ? "/ " : ""),
component->len, component->data);
if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS)
path->ncomponents++;
else {
dprintk("cannot parse %d components in path\n", n);
goto out_eio;
}
}
out:
return status;
......@@ -3557,27 +3546,23 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
n = be32_to_cpup(p);
if (n <= 0)
goto out_eio;
res->nlocations = 0;
while (res->nlocations < n) {
for (res->nlocations = 0; res->nlocations < n; res->nlocations++) {
u32 m;
struct nfs4_fs_location *loc = &res->locations[res->nlocations];
struct nfs4_fs_location *loc;
if (res->nlocations == NFS4_FS_LOCATIONS_MAXENTRIES)
break;
loc = &res->locations[res->nlocations];
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
m = be32_to_cpup(p);
loc->nservers = 0;
dprintk("%s: servers:\n", __func__);
while (loc->nservers < m) {
struct nfs4_string *server = &loc->servers[loc->nservers];
status = decode_opaque_inline(xdr, &server->len, &server->data);
if (unlikely(status != 0))
goto out_eio;
dprintk("%s ", server->data);
if (loc->nservers < NFS4_FS_LOCATION_MAXSERVERS)
loc->nservers++;
else {
for (loc->nservers = 0; loc->nservers < m; loc->nservers++) {
struct nfs4_string *server;
if (loc->nservers == NFS4_FS_LOCATION_MAXSERVERS) {
unsigned int i;
dprintk("%s: using first %u of %u servers "
"returned for location %u\n",
......@@ -3591,13 +3576,17 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
if (unlikely(status != 0))
goto out_eio;
}
break;
}
server = &loc->servers[loc->nservers];
status = decode_opaque_inline(xdr, &server->len, &server->data);
if (unlikely(status != 0))
goto out_eio;
dprintk("%s ", server->data);
}
status = decode_pathname(xdr, &loc->rootpath);
if (unlikely(status != 0))
goto out_eio;
if (res->nlocations < NFS4_FS_LOCATIONS_MAXENTRIES)
res->nlocations++;
}
if (res->nlocations != 0)
status = NFS_ATTR_FATTR_V4_LOCATIONS;
......
......@@ -84,6 +84,55 @@ nfs_page_free(struct nfs_page *p)
kmem_cache_free(nfs_page_cachep, p);
}
static void
nfs_iocounter_inc(struct nfs_io_counter *c)
{
atomic_inc(&c->io_count);
}
static void
nfs_iocounter_dec(struct nfs_io_counter *c)
{
if (atomic_dec_and_test(&c->io_count)) {
clear_bit(NFS_IO_INPROGRESS, &c->flags);
smp_mb__after_clear_bit();
wake_up_bit(&c->flags, NFS_IO_INPROGRESS);
}
}
static int
__nfs_iocounter_wait(struct nfs_io_counter *c)
{
wait_queue_head_t *wq = bit_waitqueue(&c->flags, NFS_IO_INPROGRESS);
DEFINE_WAIT_BIT(q, &c->flags, NFS_IO_INPROGRESS);
int ret = 0;
do {
prepare_to_wait(wq, &q.wait, TASK_KILLABLE);
set_bit(NFS_IO_INPROGRESS, &c->flags);
if (atomic_read(&c->io_count) == 0)
break;
ret = nfs_wait_bit_killable(&c->flags);
} while (atomic_read(&c->io_count) != 0);
finish_wait(wq, &q.wait);
return ret;
}
/**
* nfs_iocounter_wait - wait for i/o to complete
* @c: nfs_io_counter to use
*
* returns -ERESTARTSYS if interrupted by a fatal signal.
* Otherwise returns 0 once the io_count hits 0.
*/
int
nfs_iocounter_wait(struct nfs_io_counter *c)
{
if (atomic_read(&c->io_count) == 0)
return 0;
return __nfs_iocounter_wait(c);
}
/**
* nfs_create_request - Create an NFS read/write request.
* @ctx: open context to use
......@@ -104,6 +153,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
struct nfs_page *req;
struct nfs_lock_context *l_ctx;
if (test_bit(NFS_CONTEXT_BAD, &ctx->flags))
return ERR_PTR(-EBADF);
/* try to allocate the request struct */
req = nfs_page_alloc();
if (req == NULL)
......@@ -116,6 +167,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
return ERR_CAST(l_ctx);
}
req->wb_lock_context = l_ctx;
nfs_iocounter_inc(&l_ctx->io_count);
/* Initialize the request struct. Initially, we assume a
* long write-back delay. This will be adjusted in
......@@ -175,6 +227,7 @@ static void nfs_clear_request(struct nfs_page *req)
req->wb_page = NULL;
}
if (l_ctx != NULL) {
nfs_iocounter_dec(&l_ctx->io_count);
nfs_put_lock_context(l_ctx);
req->wb_lock_context = NULL;
}
......
......@@ -417,6 +417,16 @@ should_free_lseg(struct pnfs_layout_range *lseg_range,
lo_seg_intersecting(lseg_range, recall_range);
}
static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
struct list_head *tmp_list)
{
if (!atomic_dec_and_test(&lseg->pls_refcount))
return false;
pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
list_add(&lseg->pls_list, tmp_list);
return true;
}
/* Returns 1 if lseg is removed from list, 0 otherwise */
static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
struct list_head *tmp_list)
......@@ -430,12 +440,9 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
*/
dprintk("%s: lseg %p ref %d\n", __func__, lseg,
atomic_read(&lseg->pls_refcount));
if (atomic_dec_and_test(&lseg->pls_refcount)) {
pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
list_add(&lseg->pls_list, tmp_list);
if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list))
rv = 1;
}
}
return rv;
}
......@@ -711,6 +718,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
spin_lock(&lo->plh_inode->i_lock);
if (pnfs_layoutgets_blocked(lo, 1)) {
status = -EAGAIN;
} else if (!nfs4_valid_open_stateid(open_state)) {
status = -EBADF;
} else if (list_empty(&lo->plh_segs)) {
int seq;
......@@ -777,6 +786,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
return lseg;
}
static void pnfs_clear_layoutcommit(struct inode *inode,
struct list_head *head)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct pnfs_layout_segment *lseg, *tmp;
if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
return;
list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
continue;
pnfs_lseg_dec_and_remove_zero(lseg, head);
}
}
/*
* Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
* when the layout segment list is empty.
......@@ -808,6 +832,7 @@ _pnfs_return_layout(struct inode *ino)
/* Reference matched in nfs4_layoutreturn_release */
pnfs_get_layout_hdr(lo);
empty = list_empty(&lo->plh_segs);
pnfs_clear_layoutcommit(ino, &tmp_list);
pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
/* Don't send a LAYOUTRETURN if list was initially empty */
if (empty) {
......@@ -820,8 +845,6 @@ _pnfs_return_layout(struct inode *ino)
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&tmp_list);
WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
if (unlikely(lrp == NULL)) {
status = -ENOMEM;
......@@ -845,6 +868,33 @@ _pnfs_return_layout(struct inode *ino)
}
EXPORT_SYMBOL_GPL(_pnfs_return_layout);
int
pnfs_commit_and_return_layout(struct inode *inode)
{
struct pnfs_layout_hdr *lo;
int ret;
spin_lock(&inode->i_lock);
lo = NFS_I(inode)->layout;
if (lo == NULL) {
spin_unlock(&inode->i_lock);
return 0;
}
pnfs_get_layout_hdr(lo);
/* Block new layoutgets and read/write to ds */
lo->plh_block_lgets++;
spin_unlock(&inode->i_lock);
filemap_fdatawait(inode->i_mapping);
ret = pnfs_layoutcommit_inode(inode, true);
if (ret == 0)
ret = _pnfs_return_layout(inode);
spin_lock(&inode->i_lock);
lo->plh_block_lgets--;
spin_unlock(&inode->i_lock);
pnfs_put_layout_hdr(lo);
return ret;
}
bool pnfs_roc(struct inode *ino)
{
struct pnfs_layout_hdr *lo;
......@@ -1458,7 +1508,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
dprintk("pnfs write error = %d\n", hdr->pnfs_error);
if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
PNFS_LAYOUTRET_ON_ERROR) {
clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
pnfs_return_layout(hdr->inode);
}
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
......@@ -1613,7 +1662,6 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
dprintk("pnfs read error = %d\n", hdr->pnfs_error);
if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
PNFS_LAYOUTRET_ON_ERROR) {
clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
pnfs_return_layout(hdr->inode);
}
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
......@@ -1746,11 +1794,27 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
if (lseg->pls_range.iomode == IOMODE_RW &&
test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
list_add(&lseg->pls_lc_list, listp);
}
}
static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
{
struct pnfs_layout_segment *lseg, *tmp;
unsigned long *bitlock = &NFS_I(inode)->flags;
/* Matched by references in pnfs_set_layoutcommit */
list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
list_del_init(&lseg->pls_lc_list);
pnfs_put_lseg(lseg);
}
clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
smp_mb__after_clear_bit();
wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
}
void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
{
pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
......@@ -1795,6 +1859,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
}
/*
......
......@@ -219,6 +219,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
int _pnfs_return_layout(struct inode *);
int pnfs_commit_and_return_layout(struct inode *);
void pnfs_ld_write_done(struct nfs_write_data *);
void pnfs_ld_read_done(struct nfs_read_data *);
struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
......@@ -407,6 +408,11 @@ static inline int pnfs_return_layout(struct inode *ino)
return 0;
}
static inline int pnfs_commit_and_return_layout(struct inode *inode)
{
return 0;
}
static inline bool
pnfs_ld_layoutret_on_setattr(struct inode *inode)
{
......
......@@ -514,6 +514,8 @@ void nfs_read_prepare(struct rpc_task *task, void *calldata)
{
struct nfs_read_data *data = calldata;
NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
rpc_exit(task, -EIO);
}
static const struct rpc_call_ops nfs_read_common_ops = {
......
......@@ -1251,6 +1251,8 @@ void nfs_write_prepare(struct rpc_task *task, void *calldata)
{
struct nfs_write_data *data = calldata;
NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
rpc_exit(task, -EIO);
}
void nfs_commit_prepare(struct rpc_task *task, void *calldata)
......
......@@ -59,11 +59,18 @@ struct nfs_lockowner {
pid_t l_pid;
};
#define NFS_IO_INPROGRESS 0
struct nfs_io_counter {
unsigned long flags;
atomic_t io_count;
};
struct nfs_lock_context {
atomic_t count;
struct list_head list;
struct nfs_open_context *open_context;
struct nfs_lockowner lockowner;
struct nfs_io_counter io_count;
};
struct nfs4_state;
......@@ -77,6 +84,7 @@ struct nfs_open_context {
unsigned long flags;
#define NFS_CONTEXT_ERROR_WRITE (0)
#define NFS_CONTEXT_RESEND_WRITES (1)
#define NFS_CONTEXT_BAD (2)
int error;
struct list_head list;
......
......@@ -40,6 +40,7 @@ struct nfs_client {
#define NFS_CS_NORESVPORT 0 /* - use ephemeral src port */
#define NFS_CS_DISCRTRY 1 /* - disconnect on RPC retry */
#define NFS_CS_MIGRATION 2 /* - transparent state migr */
#define NFS_CS_INFINITE_SLOTS 3 /* - don't limit TCP slots */
struct sockaddr_storage cl_addr; /* server identifier */
size_t cl_addrlen;
char * cl_hostname; /* hostname of server */
......@@ -197,5 +198,7 @@ struct nfs_server {
#define NFS_CAP_MTIME (1U << 13)
#define NFS_CAP_POSIX_LOCK (1U << 14)
#define NFS_CAP_UIDGID_NOMAP (1U << 15)
#define NFS_CAP_STATEID_NFSV41 (1U << 16)
#define NFS_CAP_ATOMIC_OPEN_V1 (1U << 17)
#endif
......@@ -349,6 +349,7 @@ struct nfs_openargs {
const u32 * bitmask;
const u32 * open_bitmap;
__u32 claim;
enum createmode4 createmode;
};
struct nfs_openres {
......@@ -486,6 +487,7 @@ struct nfs_readargs {
struct nfs_fh * fh;
struct nfs_open_context *context;
struct nfs_lock_context *lock_context;
nfs4_stateid stateid;
__u64 offset;
__u32 count;
unsigned int pgbase;
......@@ -507,6 +509,7 @@ struct nfs_writeargs {
struct nfs_fh * fh;
struct nfs_open_context *context;
struct nfs_lock_context *lock_context;
nfs4_stateid stateid;
__u64 offset;
__u32 count;
enum nfs3_stable_how stable;
......
......@@ -124,6 +124,7 @@ struct rpc_create_args {
#define RPC_CLNT_CREATE_NOPING (1UL << 4)
#define RPC_CLNT_CREATE_DISCRTRY (1UL << 5)
#define RPC_CLNT_CREATE_QUIET (1UL << 6)
#define RPC_CLNT_CREATE_INFINITE_SLOTS (1UL << 7)
struct rpc_clnt *rpc_create(struct rpc_create_args *args);
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
......
......@@ -255,6 +255,8 @@ static inline int bc_prealloc(struct rpc_rqst *req)
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
#define XPRT_CREATE_INFINITE_SLOTS (1U)
struct xprt_create {
int ident; /* XPRT_TRANSPORT identifier */
struct net * net;
......@@ -263,6 +265,7 @@ struct xprt_create {
size_t addrlen;
const char *servername;
struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
unsigned int flags;
};
struct xprt_class {
......@@ -279,6 +282,7 @@ struct xprt_class {
struct rpc_xprt *xprt_create_transport(struct xprt_create *args);
void xprt_connect(struct rpc_task *task);
void xprt_reserve(struct rpc_task *task);
void xprt_retry_reserve(struct rpc_task *task);
int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
......@@ -334,6 +338,7 @@ int xs_swapper(struct rpc_xprt *xprt, int enable);
#define XPRT_CLOSING (6)
#define XPRT_CONNECTION_ABORT (7)
#define XPRT_CONNECTION_CLOSE (8)
#define XPRT_CONGESTED (9)
static inline void xprt_set_connected(struct rpc_xprt *xprt)
{
......
......@@ -24,7 +24,6 @@ config SUNRPC_XPRT_RDMA
config SUNRPC_SWAP
bool
depends on SUNRPC
select NETVM
config RPCSEC_GSS_KRB5
tristate "Secure RPC: Kerberos V mechanism"
......
......@@ -414,6 +414,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
};
char servername[48];
if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS)
xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS;
/*
* If the caller chooses not to specify a hostname, whip
* up a string representation of the passed-in address.
......@@ -1306,6 +1308,8 @@ call_reserve(struct rpc_task *task)
xprt_reserve(task);
}
static void call_retry_reserve(struct rpc_task *task);
/*
* 1b. Grok the result of xprt_reserve()
*/
......@@ -1347,7 +1351,7 @@ call_reserveresult(struct rpc_task *task)
case -ENOMEM:
rpc_delay(task, HZ >> 2);
case -EAGAIN: /* woken up; retry */
task->tk_action = call_reserve;
task->tk_action = call_retry_reserve;
return;
case -EIO: /* probably a shutdown */
break;
......@@ -1359,6 +1363,19 @@ call_reserveresult(struct rpc_task *task)
rpc_exit(task, status);
}
/*
* 1c. Retry reserving an RPC call slot
*/
static void
call_retry_reserve(struct rpc_task *task)
{
dprint_status(task);
task->tk_status = 0;
task->tk_action = call_reserveresult;
xprt_retry_reserve(task);
}
/*
* 2. Bind and/or refresh the credentials
*/
......@@ -1644,22 +1661,26 @@ call_connect_status(struct rpc_task *task)
dprint_status(task);
task->tk_status = 0;
if (status >= 0 || status == -EAGAIN) {
clnt->cl_stats->netreconn++;
task->tk_action = call_transmit;
return;
}
trace_rpc_connect_status(task, status);
switch (status) {
/* if soft mounted, test if we've timed out */
case -ETIMEDOUT:
task->tk_action = call_timeout;
return;
case -ECONNREFUSED:
case -ECONNRESET:
case -ENETUNREACH:
if (RPC_IS_SOFTCONN(task))
break;
default:
rpc_exit(task, -EIO);
/* retry with existing socket, after a delay */
case 0:
case -EAGAIN:
task->tk_status = 0;
clnt->cl_stats->netreconn++;
task->tk_action = call_transmit;
return;
}
rpc_exit(task, status);
}
/*
......
......@@ -180,6 +180,8 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
task->tk_waitqueue = queue;
queue->qlen++;
/* barrier matches the read in rpc_wake_up_task_queue_locked() */
smp_wmb();
rpc_set_queued(task);
dprintk("RPC: %5u added to queue %p \"%s\"\n",
......@@ -430,8 +432,11 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task
*/
static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task)
{
if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue)
if (RPC_IS_QUEUED(task)) {
smp_rmb();
if (task->tk_waitqueue == queue)
__rpc_do_wake_up_task(queue, task);
}
}
/*
......
......@@ -948,6 +948,34 @@ void xprt_transmit(struct rpc_task *task)
spin_unlock_bh(&xprt->transport_lock);
}
static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
{
set_bit(XPRT_CONGESTED, &xprt->state);
rpc_sleep_on(&xprt->backlog, task, NULL);
}
static void xprt_wake_up_backlog(struct rpc_xprt *xprt)
{
if (rpc_wake_up_next(&xprt->backlog) == NULL)
clear_bit(XPRT_CONGESTED, &xprt->state);
}
static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task)
{
bool ret = false;
if (!test_bit(XPRT_CONGESTED, &xprt->state))
goto out;
spin_lock(&xprt->reserve_lock);
if (test_bit(XPRT_CONGESTED, &xprt->state)) {
rpc_sleep_on(&xprt->backlog, task, NULL);
ret = true;
}
spin_unlock(&xprt->reserve_lock);
out:
return ret;
}
static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags)
{
struct rpc_rqst *req = ERR_PTR(-EAGAIN);
......@@ -992,7 +1020,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
task->tk_status = -ENOMEM;
break;
case -EAGAIN:
rpc_sleep_on(&xprt->backlog, task, NULL);
xprt_add_backlog(xprt, task);
dprintk("RPC: waiting for request slot\n");
default:
task->tk_status = -EAGAIN;
......@@ -1028,7 +1056,7 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
memset(req, 0, sizeof(*req)); /* mark unused */
list_add(&req->rq_list, &xprt->free);
}
rpc_wake_up_next(&xprt->backlog);
xprt_wake_up_backlog(xprt);
spin_unlock(&xprt->reserve_lock);
}
......@@ -1092,13 +1120,40 @@ EXPORT_SYMBOL_GPL(xprt_free);
* xprt_reserve - allocate an RPC request slot
* @task: RPC task requesting a slot allocation
*
* If no more slots are available, place the task on the transport's
* If the transport is marked as being congested, or if no more
* slots are available, place the task on the transport's
* backlog queue.
*/
void xprt_reserve(struct rpc_task *task)
{
struct rpc_xprt *xprt;
task->tk_status = 0;
if (task->tk_rqstp != NULL)
return;
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
rcu_read_lock();
xprt = rcu_dereference(task->tk_client->cl_xprt);
if (!xprt_throttle_congested(xprt, task))
xprt->ops->alloc_slot(xprt, task);
rcu_read_unlock();
}
/**
* xprt_retry_reserve - allocate an RPC request slot
* @task: RPC task requesting a slot allocation
*
* If no more slots are available, place the task on the transport's
* backlog queue.
* Note that the only difference with xprt_reserve is that we now
* ignore the value of the XPRT_CONGESTED flag.
*/
void xprt_retry_reserve(struct rpc_task *task)
{
struct rpc_xprt *xprt;
task->tk_status = 0;
if (task->tk_rqstp != NULL)
return;
......
......@@ -2207,10 +2207,6 @@ static void xs_tcp_setup_socket(struct work_struct *work)
*/
xs_tcp_force_close(xprt);
break;
case -ECONNREFUSED:
case -ECONNRESET:
case -ENETUNREACH:
/* retry with existing socket, after a delay */
case 0:
case -EINPROGRESS:
case -EALREADY:
......@@ -2221,6 +2217,10 @@ static void xs_tcp_setup_socket(struct work_struct *work)
/* Happens, for instance, if the user specified a link
* local IPv6 address without a scope-id.
*/
case -ECONNREFUSED:
case -ECONNRESET:
case -ENETUNREACH:
/* retry with existing socket, after a delay */
goto out;
}
out_eagain:
......@@ -2767,9 +2767,13 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
struct rpc_xprt *xprt;
struct sock_xprt *transport;
struct rpc_xprt *ret;
unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries;
if (args->flags & XPRT_CREATE_INFINITE_SLOTS)
max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT;
xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
xprt_max_tcp_slot_table_entries);
max_slot_table_size);
if (IS_ERR(xprt))
return xprt;
transport = container_of(xprt, struct sock_xprt, xprt);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment