Commit 22368ff1 authored by Olga Kornievskaia's avatar Olga Kornievskaia Committed by Anna Schumaker

PNFS for stateid errors retry against MDS first

Upon receiving a stateid error such as BAD_STATEID, the client
should retry the operation against the MDS before deciding to
do stateid recovery.

Previously, the code would initiate state recovery and it could
lead to a race in a state manager that could chose an incorrect
recovery method which would lead to the EIO failure for the
application.
Signed-off-by: default avatarOlga Kornievskaia <kolga@netapp.com>
Signed-off-by: default avatarAnna Schumaker <Anna.Schumaker@Netapp.com>
parent a0bc01e0
...@@ -126,32 +126,13 @@ static int filelayout_async_handle_error(struct rpc_task *task, ...@@ -126,32 +126,13 @@ static int filelayout_async_handle_error(struct rpc_task *task,
{ {
struct pnfs_layout_hdr *lo = lseg->pls_layout; struct pnfs_layout_hdr *lo = lseg->pls_layout;
struct inode *inode = lo->plh_inode; struct inode *inode = lo->plh_inode;
struct nfs_server *mds_server = NFS_SERVER(inode);
struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
struct nfs_client *mds_client = mds_server->nfs_client;
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
if (task->tk_status >= 0) if (task->tk_status >= 0)
return 0; return 0;
switch (task->tk_status) { switch (task->tk_status) {
/* MDS state errors */
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
goto out_bad_stateid;
goto wait_on_recovery;
case -NFS4ERR_EXPIRED:
if (state != NULL) {
if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
goto out_bad_stateid;
}
nfs4_schedule_lease_recovery(mds_client);
goto wait_on_recovery;
/* DS session errors */ /* DS session errors */
case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT: case -NFS4ERR_BADSLOT:
...@@ -212,17 +193,8 @@ static int filelayout_async_handle_error(struct rpc_task *task, ...@@ -212,17 +193,8 @@ static int filelayout_async_handle_error(struct rpc_task *task,
task->tk_status); task->tk_status);
return -NFS4ERR_RESET_TO_MDS; return -NFS4ERR_RESET_TO_MDS;
} }
out:
task->tk_status = 0; task->tk_status = 0;
return -EAGAIN; return -EAGAIN;
out_bad_stateid:
task->tk_status = -EIO;
return 0;
wait_on_recovery:
rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
goto out;
} }
/* NFS_PROTO call done callback routines */ /* NFS_PROTO call done callback routines */
......
...@@ -1050,34 +1050,10 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, ...@@ -1050,34 +1050,10 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
{ {
struct pnfs_layout_hdr *lo = lseg->pls_layout; struct pnfs_layout_hdr *lo = lseg->pls_layout;
struct inode *inode = lo->plh_inode; struct inode *inode = lo->plh_inode;
struct nfs_server *mds_server = NFS_SERVER(inode);
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
struct nfs_client *mds_client = mds_server->nfs_client;
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
switch (task->tk_status) { switch (task->tk_status) {
/* MDS state errors */
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
if (state == NULL)
break;
nfs_remove_bad_delegation(state->inode, NULL);
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
goto out_bad_stateid;
goto wait_on_recovery;
case -NFS4ERR_EXPIRED:
if (state != NULL) {
if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
goto out_bad_stateid;
}
nfs4_schedule_lease_recovery(mds_client);
goto wait_on_recovery;
/* DS session errors */
case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT: case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT: case -NFS4ERR_BAD_HIGH_SLOT:
...@@ -1137,17 +1113,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, ...@@ -1137,17 +1113,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
task->tk_status); task->tk_status);
return -NFS4ERR_RESET_TO_MDS; return -NFS4ERR_RESET_TO_MDS;
} }
out:
task->tk_status = 0; task->tk_status = 0;
return -EAGAIN; return -EAGAIN;
out_bad_stateid:
task->tk_status = -EIO;
return 0;
wait_on_recovery:
rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
goto out;
} }
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment