Commit 191f7912 authored by Trond Myklebust's avatar Trond Myklebust

NFSv4: Clean up the reboot recovery. Ensure that we exclude stateful

   operations by using a per-server read/write semaphore.
Signed-off-by: default avatarTrond Myklebust <trond.myklebust@fys.uio.no>
parent b6b42cec
......@@ -1510,8 +1510,13 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
nfs_idmap_new(clp);
}
if (list_empty(&clp->cl_superblocks))
clear_bit(NFS4CLNT_OK, &clp->cl_state);
if (list_empty(&clp->cl_superblocks)) {
err = nfs4_init_client(clp);
if (err != 0) {
up_write(&clp->cl_sem);
goto out_fail;
}
}
list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
clnt = rpc_clone_client(clp->cl_rpcclient);
if (!IS_ERR(clnt))
......
......@@ -254,6 +254,7 @@ static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct
struct nfs4_state_owner *sp;
struct nfs4_state *state = NULL;
struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_client *clp = server->nfs4_state;
struct inode *inode = NULL;
int status;
struct nfs_fattr f_attr = {
......@@ -279,6 +280,8 @@ static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct
.rpc_cred = cred,
};
/* Protect against reboot recovery conflicts */
down_read(&clp->cl_sem);
status = -ENOMEM;
if (!(sp = nfs4_get_state_owner(server, cred))) {
dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
......@@ -342,15 +345,18 @@ static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct
up(&sp->so_sema);
nfs4_put_state_owner(sp);
up_read(&clp->cl_sem);
*res = state;
return 0;
out_err:
if (sp != NULL) {
if (state != NULL)
nfs4_put_open_state(state);
up(&sp->so_sema);
nfs4_put_state_owner(sp);
}
if (state != NULL)
nfs4_put_open_state(state);
/* Note: clp->cl_sem must be released before nfs4_put_open_state()! */
up_read(&clp->cl_sem);
if (inode != NULL)
iput(inode);
*res = NULL;
......@@ -446,7 +452,7 @@ int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
*
* NOTE: Caller must be holding the sp->so_owner semaphore!
*/
int _nfs4_do_close(struct inode *inode, struct nfs4_state *state)
static int _nfs4_do_close(struct inode *inode, struct nfs4_state *state)
{
struct nfs4_state_owner *sp = state->owner;
int status = 0;
......@@ -475,7 +481,27 @@ int _nfs4_do_close(struct inode *inode, struct nfs4_state *state)
return status;
}
int _nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode)
int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
{
struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs4_exception exception = { };
int err;
do {
err = _nfs4_do_close(inode, state);
switch (err) {
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
nfs4_schedule_state_recovery(server->nfs4_state);
err = 0;
default:
state->state = 0;
}
err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
return err;
}
static int _nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode)
{
struct nfs4_state_owner *sp = state->owner;
int status = 0;
......@@ -500,6 +526,26 @@ int _nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mod
return status;
}
int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode)
{
struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs4_exception exception = { };
int err;
do {
err = _nfs4_do_downgrade(inode, state, mode);
switch (err) {
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
nfs4_schedule_state_recovery(server->nfs4_state);
err = 0;
default:
state->state = mode;
}
err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
return err;
}
struct inode *
nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
{
......@@ -1829,6 +1875,8 @@ nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
case -NFS4ERR_EXPIRED:
rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL);
nfs4_schedule_state_recovery(clp);
if (test_bit(NFS4CLNT_OK, &clp->cl_state))
rpc_wake_up_task(task);
task->tk_status = 0;
return -EAGAIN;
case -NFS4ERR_GRACE:
......@@ -1844,12 +1892,11 @@ nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
return 0;
}
int
nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp)
int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp)
{
DEFINE_WAIT(wait);
sigset_t oldset;
int interruptible, res;
int interruptible, res = 0;
might_sleep();
......@@ -1857,19 +1904,12 @@ nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp)
interruptible = TASK_UNINTERRUPTIBLE;
if (clnt->cl_intr)
interruptible = TASK_INTERRUPTIBLE;
do {
res = 0;
prepare_to_wait(&clp->cl_waitq, &wait, interruptible);
nfs4_schedule_state_recovery(clp);
if (test_bit(NFS4CLNT_OK, &clp->cl_state) &&
!test_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state))
break;
if (clnt->cl_intr && signalled()) {
res = -ERESTARTSYS;
break;
}
prepare_to_wait(&clp->cl_waitq, &wait, interruptible);
nfs4_schedule_state_recovery(clp);
if (clnt->cl_intr && signalled())
res = -ERESTARTSYS;
else if (!test_bit(NFS4CLNT_OK, &clp->cl_state))
schedule();
} while(!test_bit(NFS4CLNT_OK, &clp->cl_state));
finish_wait(&clp->cl_waitq, &wait);
rpc_clnt_sigunmask(clnt, &oldset);
return res;
......@@ -2072,6 +2112,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
struct nfs4_lock_state *lsp;
int status;
down_read(&clp->cl_sem);
nlo.clientid = clp->cl_clientid;
down(&state->lock_sema);
lsp = nfs4_find_lock_state(state, request->fl_owner);
......@@ -2105,6 +2146,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
if (lsp)
nfs4_put_lock_state(lsp);
up(&state->lock_sema);
up_read(&clp->cl_sem);
return status;
}
......@@ -2125,6 +2167,7 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock
{
struct inode *inode = state->inode;
struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_client *clp = server->nfs4_state;
struct nfs_lockargs arg = {
.fh = NFS_FH(inode),
.type = nfs4_lck_type(cmd, request),
......@@ -2144,6 +2187,7 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock
struct nfs_locku_opargs luargs;
int status = 0;
down_read(&clp->cl_sem);
down(&state->lock_sema);
lsp = nfs4_find_lock_state(state, request->fl_owner);
if (!lsp)
......@@ -2164,6 +2208,7 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock
up(&state->lock_sema);
if (status == 0)
posix_lock_file(request->fl_file, request);
up_read(&clp->cl_sem);
return status;
}
......@@ -2184,6 +2229,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
{
struct inode *inode = state->inode;
struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_client *clp = server->nfs4_state;
struct nfs4_lock_state *lsp;
struct nfs_lockargs arg = {
.fh = NFS_FH(inode),
......@@ -2205,6 +2251,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
};
int status;
down_read(&clp->cl_sem);
down(&state->lock_sema);
lsp = nfs4_find_lock_state(state, request->fl_owner);
if (lsp == NULL) {
......@@ -2258,6 +2305,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
if (posix_lock_file_wait(request->fl_file, request) < 0)
printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__);
}
up_read(&clp->cl_sem);
return status;
}
......
......@@ -106,7 +106,7 @@ nfs4_alloc_client(struct in_addr *addr)
INIT_LIST_HEAD(&clp->cl_superblocks);
init_waitqueue_head(&clp->cl_waitq);
rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
clp->cl_state = 1 << NFS4CLNT_NEW;
clp->cl_state = 1 << NFS4CLNT_OK;
}
return clp;
}
......@@ -169,6 +169,16 @@ nfs4_put_client(struct nfs4_client *clp)
nfs4_free_client(clp);
}
int nfs4_init_client(struct nfs4_client *clp)
{
int status = nfs4_proc_setclientid(clp, 0, 0);
if (status == 0)
status = nfs4_proc_setclientid_confirm(clp);
if (status == 0)
nfs4_schedule_state_renewal(clp);
return status;
}
u32
nfs4_alloc_lockowner_id(struct nfs4_client *clp)
{
......@@ -185,7 +195,6 @@ nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
atomic_inc(&sp->so_count);
sp->so_cred = cred;
list_move(&sp->so_list, &clp->cl_state_owners);
sp->so_generation = clp->cl_generation;
clp->cl_nunused--;
}
return sp;
......@@ -237,8 +246,11 @@ nfs4_unhash_state_owner(struct nfs4_state_owner *sp)
spin_unlock(&clp->cl_lock);
}
struct nfs4_state_owner *
nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
/*
* Note: must be called with clp->cl_sem held in order to prevent races
* with reboot recovery!
*/
struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
{
struct nfs4_client *clp = server->nfs4_state;
struct nfs4_state_owner *sp, *new;
......@@ -254,23 +266,23 @@ nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
new->so_client = clp;
new->so_id = nfs4_alloc_lockowner_id(clp);
new->so_cred = cred;
new->so_generation = clp->cl_generation;
sp = new;
new = NULL;
}
spin_unlock(&clp->cl_lock);
if (new)
kfree(new);
if (sp) {
if (!test_bit(NFS4CLNT_OK, &clp->cl_state))
nfs4_wait_clnt_recover(server->client, clp);
} else
put_rpccred(cred);
return sp;
if (sp != NULL)
return sp;
put_rpccred(cred);
return NULL;
}
void
nfs4_put_state_owner(struct nfs4_state_owner *sp)
/*
* Must be called with clp->cl_sem held in order to avoid races
* with state recovery...
*/
void nfs4_put_state_owner(struct nfs4_state_owner *sp)
{
struct nfs4_client *clp = sp->so_client;
struct rpc_cred *cred = sp->so_cred;
......@@ -330,8 +342,6 @@ __nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
continue;
if ((state->state & mode) != mode)
continue;
/* Add the state to the head of the inode's list */
list_move(&state->inode_states, &nfsi->open_states);
atomic_inc(&state->count);
if (mode & FMODE_READ)
state->nreaders++;
......@@ -353,8 +363,6 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
if (state->nreaders == 0 && state->nwriters == 0)
continue;
if (state->owner == owner) {
/* Add the state to the head of the inode's list */
list_move(&state->inode_states, &nfsi->open_states);
atomic_inc(&state->count);
return state;
}
......@@ -411,55 +419,40 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
return state;
}
static void
__nfs4_put_open_state(struct nfs4_state *state)
/*
* Beware! Caller must be holding exactly one
* reference to clp->cl_sem and owner->so_sema!
*/
void nfs4_put_open_state(struct nfs4_state *state)
{
struct inode *inode = state->inode;
struct nfs4_state_owner *owner = state->owner;
struct nfs4_exception exception = { };
int status = 0;
if (!atomic_dec_and_lock(&state->count, &inode->i_lock)) {
up(&owner->so_sema);
if (!atomic_dec_and_lock(&state->count, &inode->i_lock))
return;
}
if (!list_empty(&state->inode_states))
list_del(&state->inode_states);
spin_unlock(&inode->i_lock);
list_del(&state->open_states);
if (state->state != 0) {
for (;;) {
status = _nfs4_do_close(inode, state);
up(&owner->so_sema);
if (!status)
break;
status = nfs4_handle_exception(NFS_SERVER(inode), status, &exception);
if (!exception.retry)
break;
down(&owner->so_sema);
}
} else
up(&owner->so_sema);
BUG_ON (state->state != 0);
nfs4_free_open_state(state);
nfs4_put_state_owner(owner);
}
void
nfs4_put_open_state(struct nfs4_state *state)
{
down(&state->owner->so_sema);
__nfs4_put_open_state(state);
}
void
nfs4_close_state(struct nfs4_state *state, mode_t mode)
/*
* Beware! Caller must be holding no references to clp->cl_sem!
* of owner->so_sema!
*/
void nfs4_close_state(struct nfs4_state *state, mode_t mode)
{
struct inode *inode = state->inode;
struct nfs4_state_owner *owner = state->owner;
struct nfs4_exception exception = { };
struct nfs4_client *clp = owner->so_client;
int newstate;
int status = 0;
atomic_inc(&owner->so_count);
down_read(&clp->cl_sem);
down(&owner->so_sema);
/* Protect against nfs4_find_state() */
spin_lock(&inode->i_lock);
......@@ -470,29 +463,24 @@ nfs4_close_state(struct nfs4_state *state, mode_t mode)
if (state->nwriters == 0 && state->nreaders == 0)
list_del_init(&state->inode_states);
spin_unlock(&inode->i_lock);
do {
newstate = 0;
if (state->state == 0)
break;
newstate = 0;
if (state->state != 0) {
if (state->nreaders)
newstate |= FMODE_READ;
if (state->nwriters)
newstate |= FMODE_WRITE;
if (state->state == newstate)
break;
goto out;
if (newstate != 0)
status = _nfs4_do_downgrade(inode, state, newstate);
status = nfs4_do_downgrade(inode, state, newstate);
else
status = _nfs4_do_close(inode, state);
if (!status) {
state->state = newstate;
break;
}
up(&owner->so_sema);
status = nfs4_handle_exception(NFS_SERVER(inode), status, &exception);
down(&owner->so_sema);
} while (exception.retry);
__nfs4_put_open_state(state);
status = nfs4_do_close(inode, state);
}
out:
nfs4_put_open_state(state);
up(&owner->so_sema);
nfs4_put_state_owner(owner);
up_read(&clp->cl_sem);
}
/*
......@@ -571,10 +559,9 @@ nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_own
}
/*
* Called with state->lock_sema held.
* Called with state->lock_sema and clp->cl_sem held.
*/
void
nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp)
void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp)
{
if (status == NFS_OK || seqid_mutating_err(-status))
lsp->ls_seqid++;
......@@ -661,14 +648,13 @@ nfs4_put_lock_state(struct nfs4_lock_state *lsp)
}
/*
* Called with sp->so_sema held.
* Called with sp->so_sema and clp->cl_sem held.
*
* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
* failed with a seqid incrementing error -
* see comments nfs_fs.h:seqid_mutating_error()
*/
void
nfs4_increment_seqid(int status, struct nfs4_state_owner *sp)
void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp)
{
if (status == NFS_OK || seqid_mutating_err(-status))
sp->so_seqid++;
......@@ -697,21 +683,14 @@ nfs4_recover_state(void *data)
init_completion(&args.complete);
down_read(&clp->cl_sem);
if (test_and_set_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state))
goto out_failed;
if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0)
goto out_failed_clear;
wait_for_completion(&args.complete);
return;
out_failed_clear:
smp_mb__before_clear_bit();
clear_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state);
smp_mb__after_clear_bit();
set_bit(NFS4CLNT_OK, &clp->cl_state);
wake_up_all(&clp->cl_waitq);
rpc_wake_up(&clp->cl_rpcwaitq);
out_failed:
up_read(&clp->cl_sem);
}
/*
......@@ -722,10 +701,8 @@ nfs4_schedule_state_recovery(struct nfs4_client *clp)
{
if (!clp)
return;
smp_mb__before_clear_bit();
clear_bit(NFS4CLNT_OK, &clp->cl_state);
smp_mb__after_clear_bit();
schedule_work(&clp->cl_recoverd);
if (test_and_clear_bit(NFS4CLNT_OK, &clp->cl_state))
schedule_work(&clp->cl_recoverd);
}
static int
......@@ -766,75 +743,50 @@ nfs4_reclaim_open_state(struct nfs4_state_owner *sp)
return status;
}
static int
reclaimer(void *ptr)
static int reclaimer(void *ptr)
{
struct reclaimer_args *args = (struct reclaimer_args *)ptr;
struct nfs4_client *clp = args->clp;
struct nfs4_state_owner *sp;
int generation;
int status;
daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp->cl_addr));
allow_signal(SIGKILL);
atomic_inc(&clp->cl_count);
complete(&args->complete);
/* Ensure exclusive access to NFSv4 state */
down_write(&clp->cl_sem);
/* Are there any NFS mounts out there? */
if (list_empty(&clp->cl_superblocks))
goto out;
if (!test_bit(NFS4CLNT_NEW, &clp->cl_state)) {
status = nfs4_proc_renew(clp);
if (status == 0) {
set_bit(NFS4CLNT_OK, &clp->cl_state);
goto out;
}
}
status = nfs4_proc_setclientid(clp, 0, 0);
if (status)
goto out_error;
status = nfs4_proc_setclientid_confirm(clp);
restart_loop:
status = nfs4_proc_renew(clp);
if (status == 0)
goto out;
status = nfs4_init_client(clp);
if (status)
goto out_error;
generation = ++(clp->cl_generation);
clear_bit(NFS4CLNT_NEW, &clp->cl_state);
set_bit(NFS4CLNT_OK, &clp->cl_state);
up_read(&clp->cl_sem);
nfs4_schedule_state_renewal(clp);
restart_loop:
spin_lock(&clp->cl_lock);
/* Note: list is protected by exclusive lock on cl->cl_sem */
list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
if (sp->so_generation - generation >= 0)
continue;
atomic_inc(&sp->so_count);
spin_unlock(&clp->cl_lock);
down(&sp->so_sema);
if (sp->so_generation - generation < 0) {
smp_rmb();
sp->so_generation = clp->cl_generation;
status = nfs4_reclaim_open_state(sp);
}
up(&sp->so_sema);
nfs4_put_state_owner(sp);
status = nfs4_reclaim_open_state(sp);
if (status < 0) {
if (status == -NFS4ERR_STALE_CLIENTID)
nfs4_schedule_state_recovery(clp);
goto out;
goto restart_loop;
goto out_error;
}
goto restart_loop;
}
spin_unlock(&clp->cl_lock);
out:
smp_mb__before_clear_bit();
clear_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state);
smp_mb__after_clear_bit();
set_bit(NFS4CLNT_OK, &clp->cl_state);
up_write(&clp->cl_sem);
wake_up_all(&clp->cl_waitq);
rpc_wake_up(&clp->cl_rpcwaitq);
nfs4_put_client(clp);
return 0;
out_error:
printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u\n",
NIPQUAD(clp->cl_addr.s_addr));
up_read(&clp->cl_sem);
printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
NIPQUAD(clp->cl_addr.s_addr), -status);
goto out;
}
......
......@@ -507,8 +507,6 @@ struct idmap;
enum nfs4_client_state {
NFS4CLNT_OK = 0,
NFS4CLNT_NEW,
NFS4CLNT_SETUP_STATE,
};
/*
......@@ -520,7 +518,6 @@ struct nfs4_client {
u64 cl_clientid; /* constant */
nfs4_verifier cl_confirm;
unsigned long cl_state;
long cl_generation;
u32 cl_lockowner_id;
......@@ -573,9 +570,7 @@ struct nfs4_state_owner {
u32 so_id; /* 32-bit identifier, unique */
struct semaphore so_sema;
u32 so_seqid; /* protected by so_sema */
unsigned int so_flags; /* protected by so_sema */
atomic_t so_count;
long so_generation;
struct rpc_cred *so_cred; /* Associated cred */
struct list_head so_states;
......@@ -643,8 +638,8 @@ extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *);
extern int nfs4_proc_async_renew(struct nfs4_client *);
extern int nfs4_proc_renew(struct nfs4_client *);
extern int _nfs4_do_close(struct inode *, struct nfs4_state *);
extern int _nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode);
extern int nfs4_do_close(struct inode *, struct nfs4_state *);
extern int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode);
extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *);
extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
......@@ -660,6 +655,7 @@ extern void init_nfsv4_state(struct nfs_server *);
extern void destroy_nfsv4_state(struct nfs_server *);
extern struct nfs4_client *nfs4_get_client(struct in_addr *);
extern void nfs4_put_client(struct nfs4_client *clp);
extern int nfs4_init_client(struct nfs4_client *clp);
extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *);
extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment