Commit 101688f5 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfs-for-4.3-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client bugfixes from Trond Myklebust:
 "Highlights include:

  Stable patches:
   - fix v4.2 SEEK on files over 2 gigs
   - Fix a layout segment reference leak when pNFS I/O falls back to inband I/O.
   - Fix recovery of recalled read delegations

  Bugfixes:
   - Fix a case where NFSv4 fails to send CLOSE after a server reboot
   - Fix sunrpc to wait for connections to complete before retrying
   - Fix sunrpc races between transport connect/disconnect and shutdown
   - Fix an infinite loop when layoutget fail with BAD_STATEID
   - nfs/filelayout: Fix NULL reference caused by double freeing of fh_array
   - Fix a bogus WARN_ON_ONCE() in O_DIRECT when layout commit_through_mds is set
   - Fix layoutreturn/close ordering issues"

* tag 'nfs-for-4.3-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs:
  NFS41: make close wait for layoutreturn
  NFS: Skip checking ds_cinfo.buckets when lseg's commit_through_mds is set
  NFSv4.x/pnfs: Don't try to recover stateids twice in layoutget
  NFSv4: Recovery of recalled read delegations is broken
  NFS: Fix an infinite loop when layoutget fail with BAD_STATEID
  NFS: Do cleanup before resetting pageio read/write to mds
  SUNRPC: xs_sock_mark_closed() does not need to trigger socket autoclose
  SUNRPC: Lock the transport layer on shutdown
  nfs/filelayout: Fix NULL reference caused by double freeing of fh_array
  SUNRPC: Ensure that we wait for connections to complete before retrying
  SUNRPC: drop null test before destroy functions
  nfs: fix v4.2 SEEK on files over 2 gigs
  SUNRPC: Fix races between socket connection and destroy code
  nfs: fix pg_test page count calculation
  Failing to send a CLOSE if file is opened WRONLY and server reboots on a 4.x mount
parents ddff42e5 500d701f
...@@ -113,7 +113,8 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ ...@@ -113,7 +113,8 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
return status; return status;
} }
static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid) static int nfs_delegation_claim_opens(struct inode *inode,
const nfs4_stateid *stateid, fmode_t type)
{ {
struct nfs_inode *nfsi = NFS_I(inode); struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_context *ctx; struct nfs_open_context *ctx;
...@@ -140,7 +141,7 @@ static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *s ...@@ -140,7 +141,7 @@ static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *s
/* Block nfs4_proc_unlck */ /* Block nfs4_proc_unlck */
mutex_lock(&sp->so_delegreturn_mutex); mutex_lock(&sp->so_delegreturn_mutex);
seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
err = nfs4_open_delegation_recall(ctx, state, stateid); err = nfs4_open_delegation_recall(ctx, state, stateid, type);
if (!err) if (!err)
err = nfs_delegation_claim_locks(ctx, state, stateid); err = nfs_delegation_claim_locks(ctx, state, stateid);
if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
...@@ -411,7 +412,8 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation ...@@ -411,7 +412,8 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation
do { do {
if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
break; break;
err = nfs_delegation_claim_opens(inode, &delegation->stateid); err = nfs_delegation_claim_opens(inode, &delegation->stateid,
delegation->type);
if (!issync || err != -EAGAIN) if (!issync || err != -EAGAIN)
break; break;
/* /*
......
...@@ -54,7 +54,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp); ...@@ -54,7 +54,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
/* NFSv4 delegation-related procedures */ /* NFSv4 delegation-related procedures */
int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync);
int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type);
int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags); bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags);
......
...@@ -166,8 +166,11 @@ nfs_direct_select_verf(struct nfs_direct_req *dreq, ...@@ -166,8 +166,11 @@ nfs_direct_select_verf(struct nfs_direct_req *dreq,
struct nfs_writeverf *verfp = &dreq->verf; struct nfs_writeverf *verfp = &dreq->verf;
#ifdef CONFIG_NFS_V4_1 #ifdef CONFIG_NFS_V4_1
if (ds_clp) { /*
/* pNFS is in use, use the DS verf */ * pNFS is in use, use the DS verf except commit_through_mds is set
* for layout segment where nbuckets is zero.
*/
if (ds_clp && dreq->ds_cinfo.nbuckets > 0) {
if (commit_idx >= 0 && commit_idx < dreq->ds_cinfo.nbuckets) if (commit_idx >= 0 && commit_idx < dreq->ds_cinfo.nbuckets)
verfp = &dreq->ds_cinfo.buckets[commit_idx].direct_verf; verfp = &dreq->ds_cinfo.buckets[commit_idx].direct_verf;
else else
......
...@@ -629,23 +629,18 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, ...@@ -629,23 +629,18 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
goto out; goto out;
} }
static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl) static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
{ {
int i; int i;
for (i = 0; i < fl->num_fh; i++) { if (fl->fh_array) {
if (!fl->fh_array[i]) for (i = 0; i < fl->num_fh; i++) {
break; if (!fl->fh_array[i])
kfree(fl->fh_array[i]); break;
kfree(fl->fh_array[i]);
}
kfree(fl->fh_array);
} }
kfree(fl->fh_array);
fl->fh_array = NULL;
}
static void
_filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
{
filelayout_free_fh_array(fl);
kfree(fl); kfree(fl);
} }
...@@ -716,21 +711,21 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, ...@@ -716,21 +711,21 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
/* Do we want to use a mempool here? */ /* Do we want to use a mempool here? */
fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags); fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags);
if (!fl->fh_array[i]) if (!fl->fh_array[i])
goto out_err_free; goto out_err;
p = xdr_inline_decode(&stream, 4); p = xdr_inline_decode(&stream, 4);
if (unlikely(!p)) if (unlikely(!p))
goto out_err_free; goto out_err;
fl->fh_array[i]->size = be32_to_cpup(p++); fl->fh_array[i]->size = be32_to_cpup(p++);
if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
printk(KERN_ERR "NFS: Too big fh %d received %d\n", printk(KERN_ERR "NFS: Too big fh %d received %d\n",
i, fl->fh_array[i]->size); i, fl->fh_array[i]->size);
goto out_err_free; goto out_err;
} }
p = xdr_inline_decode(&stream, fl->fh_array[i]->size); p = xdr_inline_decode(&stream, fl->fh_array[i]->size);
if (unlikely(!p)) if (unlikely(!p))
goto out_err_free; goto out_err;
memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
dprintk("DEBUG: %s: fh len %d\n", __func__, dprintk("DEBUG: %s: fh len %d\n", __func__,
fl->fh_array[i]->size); fl->fh_array[i]->size);
...@@ -739,8 +734,6 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, ...@@ -739,8 +734,6 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
__free_page(scratch); __free_page(scratch);
return 0; return 0;
out_err_free:
filelayout_free_fh_array(fl);
out_err: out_err:
__free_page(scratch); __free_page(scratch);
return -EIO; return -EIO;
......
...@@ -175,10 +175,12 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) ...@@ -175,10 +175,12 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
{ {
struct nfs_server *server = NFS_SERVER(file_inode(filep)); struct nfs_server *server = NFS_SERVER(file_inode(filep));
struct nfs4_exception exception = { }; struct nfs4_exception exception = { };
int err; loff_t err;
do { do {
err = _nfs42_proc_llseek(filep, offset, whence); err = _nfs42_proc_llseek(filep, offset, whence);
if (err >= 0)
break;
if (err == -ENOTSUPP) if (err == -ENOTSUPP)
return -EOPNOTSUPP; return -EOPNOTSUPP;
err = nfs4_handle_exception(server, err, &exception); err = nfs4_handle_exception(server, err, &exception);
......
...@@ -1127,6 +1127,21 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) ...@@ -1127,6 +1127,21 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
return ret; return ret;
} }
static bool nfs4_mode_match_open_stateid(struct nfs4_state *state,
fmode_t fmode)
{
switch(fmode & (FMODE_READ|FMODE_WRITE)) {
case FMODE_READ|FMODE_WRITE:
return state->n_rdwr != 0;
case FMODE_WRITE:
return state->n_wronly != 0;
case FMODE_READ:
return state->n_rdonly != 0;
}
WARN_ON_ONCE(1);
return false;
}
static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode) static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode)
{ {
int ret = 0; int ret = 0;
...@@ -1571,17 +1586,13 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context ...@@ -1571,17 +1586,13 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
return opendata; return opendata;
} }
static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmode, struct nfs4_state **res) static int nfs4_open_recover_helper(struct nfs4_opendata *opendata,
fmode_t fmode)
{ {
struct nfs4_state *newstate; struct nfs4_state *newstate;
int ret; int ret;
if ((opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR || if (!nfs4_mode_match_open_stateid(opendata->state, fmode))
opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEG_CUR_FH) &&
(opendata->o_arg.u.delegation_type & fmode) != fmode)
/* This mode can't have been delegated, so we must have
* a valid open_stateid to cover it - not need to reclaim.
*/
return 0; return 0;
opendata->o_arg.open_flags = 0; opendata->o_arg.open_flags = 0;
opendata->o_arg.fmode = fmode; opendata->o_arg.fmode = fmode;
...@@ -1597,14 +1608,14 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod ...@@ -1597,14 +1608,14 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod
newstate = nfs4_opendata_to_nfs4_state(opendata); newstate = nfs4_opendata_to_nfs4_state(opendata);
if (IS_ERR(newstate)) if (IS_ERR(newstate))
return PTR_ERR(newstate); return PTR_ERR(newstate);
if (newstate != opendata->state)
ret = -ESTALE;
nfs4_close_state(newstate, fmode); nfs4_close_state(newstate, fmode);
*res = newstate; return ret;
return 0;
} }
static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state) static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state)
{ {
struct nfs4_state *newstate;
int ret; int ret;
/* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */ /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */
...@@ -1615,27 +1626,15 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * ...@@ -1615,27 +1626,15 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
clear_bit(NFS_DELEGATED_STATE, &state->flags); clear_bit(NFS_DELEGATED_STATE, &state->flags);
clear_bit(NFS_OPEN_STATE, &state->flags); clear_bit(NFS_OPEN_STATE, &state->flags);
smp_rmb(); smp_rmb();
if (state->n_rdwr != 0) { ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE);
ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); if (ret != 0)
if (ret != 0) return ret;
return ret; ret = nfs4_open_recover_helper(opendata, FMODE_WRITE);
if (newstate != state) if (ret != 0)
return -ESTALE; return ret;
} ret = nfs4_open_recover_helper(opendata, FMODE_READ);
if (state->n_wronly != 0) { if (ret != 0)
ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); return ret;
if (ret != 0)
return ret;
if (newstate != state)
return -ESTALE;
}
if (state->n_rdonly != 0) {
ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
if (ret != 0)
return ret;
if (newstate != state)
return -ESTALE;
}
/* /*
* We may have performed cached opens for all three recoveries. * We may have performed cached opens for all three recoveries.
* Check if we need to update the current stateid. * Check if we need to update the current stateid.
...@@ -1759,18 +1758,32 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct ...@@ -1759,18 +1758,32 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
return err; return err;
} }
int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) int nfs4_open_delegation_recall(struct nfs_open_context *ctx,
struct nfs4_state *state, const nfs4_stateid *stateid,
fmode_t type)
{ {
struct nfs_server *server = NFS_SERVER(state->inode); struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs4_opendata *opendata; struct nfs4_opendata *opendata;
int err; int err = 0;
opendata = nfs4_open_recoverdata_alloc(ctx, state, opendata = nfs4_open_recoverdata_alloc(ctx, state,
NFS4_OPEN_CLAIM_DELEG_CUR_FH); NFS4_OPEN_CLAIM_DELEG_CUR_FH);
if (IS_ERR(opendata)) if (IS_ERR(opendata))
return PTR_ERR(opendata); return PTR_ERR(opendata);
nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid);
err = nfs4_open_recover(opendata, state); clear_bit(NFS_DELEGATED_STATE, &state->flags);
switch (type & (FMODE_READ|FMODE_WRITE)) {
case FMODE_READ|FMODE_WRITE:
case FMODE_WRITE:
err = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE);
if (err)
break;
err = nfs4_open_recover_helper(opendata, FMODE_WRITE);
if (err)
break;
case FMODE_READ:
err = nfs4_open_recover_helper(opendata, FMODE_READ);
}
nfs4_opendata_put(opendata); nfs4_opendata_put(opendata);
return nfs4_handle_delegation_recall_error(server, state, stateid, err); return nfs4_handle_delegation_recall_error(server, state, stateid, err);
} }
...@@ -2645,6 +2658,15 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, ...@@ -2645,6 +2658,15 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
return err; return err;
} }
static bool
nfs4_wait_on_layoutreturn(struct inode *inode, struct rpc_task *task)
{
if (inode == NULL || !nfs_have_layout(inode))
return false;
return pnfs_wait_on_layoutreturn(inode, task);
}
struct nfs4_closedata { struct nfs4_closedata {
struct inode *inode; struct inode *inode;
struct nfs4_state *state; struct nfs4_state *state;
...@@ -2763,6 +2785,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) ...@@ -2763,6 +2785,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
goto out_no_action; goto out_no_action;
} }
if (nfs4_wait_on_layoutreturn(inode, task)) {
nfs_release_seqid(calldata->arg.seqid);
goto out_wait;
}
if (calldata->arg.fmode == 0) if (calldata->arg.fmode == 0)
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
if (calldata->roc) if (calldata->roc)
...@@ -5308,6 +5335,9 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) ...@@ -5308,6 +5335,9 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
d_data = (struct nfs4_delegreturndata *)data; d_data = (struct nfs4_delegreturndata *)data;
if (nfs4_wait_on_layoutreturn(d_data->inode, task))
return;
if (d_data->roc) if (d_data->roc)
pnfs_roc_get_barrier(d_data->inode, &d_data->roc_barrier); pnfs_roc_get_barrier(d_data->inode, &d_data->roc_barrier);
...@@ -7800,39 +7830,46 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) ...@@ -7800,39 +7830,46 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n", dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n",
__func__, delay); __func__, delay);
rpc_delay(task, delay); rpc_delay(task, delay);
task->tk_status = 0; /* Do not call nfs4_async_handle_error() */
rpc_restart_call_prepare(task); goto out_restart;
goto out; /* Do not call nfs4_async_handle_error() */
} }
break; break;
case -NFS4ERR_EXPIRED: case -NFS4ERR_EXPIRED:
case -NFS4ERR_BAD_STATEID: case -NFS4ERR_BAD_STATEID:
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
lo = NFS_I(inode)->layout; if (nfs4_stateid_match(&lgp->args.stateid,
if (!lo || list_empty(&lo->plh_segs)) { &lgp->args.ctx->state->stateid)) {
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
/* If the open stateid was bad, then recover it. */ /* If the open stateid was bad, then recover it. */
state = lgp->args.ctx->state; state = lgp->args.ctx->state;
} else { break;
}
lo = NFS_I(inode)->layout;
if (lo && nfs4_stateid_match(&lgp->args.stateid,
&lo->plh_stateid)) {
LIST_HEAD(head); LIST_HEAD(head);
/* /*
* Mark the bad layout state as invalid, then retry * Mark the bad layout state as invalid, then retry
* with the current stateid. * with the current stateid.
*/ */
set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); pnfs_mark_matching_lsegs_invalid(lo, &head, NULL);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&head); pnfs_free_lseg_list(&head);
} else
task->tk_status = 0; spin_unlock(&inode->i_lock);
rpc_restart_call_prepare(task); goto out_restart;
}
} }
if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
rpc_restart_call_prepare(task); goto out_restart;
out: out:
dprintk("<-- %s\n", __func__); dprintk("<-- %s\n", __func__);
return; return;
out_restart:
task->tk_status = 0;
rpc_restart_call_prepare(task);
return;
out_overflow: out_overflow:
task->tk_status = -EOVERFLOW; task->tk_status = -EOVERFLOW;
goto out; goto out;
......
...@@ -1481,7 +1481,7 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs ...@@ -1481,7 +1481,7 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
spin_unlock(&state->state_lock); spin_unlock(&state->state_lock);
} }
nfs4_put_open_state(state); nfs4_put_open_state(state);
clear_bit(NFS4CLNT_RECLAIM_NOGRACE, clear_bit(NFS_STATE_RECLAIM_NOGRACE,
&state->flags); &state->flags);
spin_lock(&sp->so_lock); spin_lock(&sp->so_lock);
goto restart; goto restart;
......
...@@ -508,7 +508,7 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, ...@@ -508,7 +508,7 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
* for it without upsetting the slab allocator. * for it without upsetting the slab allocator.
*/ */
if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) * if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) *
sizeof(struct page) > PAGE_SIZE) sizeof(struct page *) > PAGE_SIZE)
return 0; return 0;
return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes); return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes);
......
...@@ -1104,20 +1104,15 @@ bool pnfs_roc(struct inode *ino) ...@@ -1104,20 +1104,15 @@ bool pnfs_roc(struct inode *ino)
mark_lseg_invalid(lseg, &tmp_list); mark_lseg_invalid(lseg, &tmp_list);
found = true; found = true;
} }
/* pnfs_prepare_layoutreturn() grabs lo ref and it will be put /* ROC in two conditions:
* in pnfs_roc_release(). We don't really send a layoutreturn but
* still want others to view us like we are sending one!
*
* If pnfs_prepare_layoutreturn() fails, it means someone else is doing
* LAYOUTRETURN, so we proceed like there are no layouts to return.
*
* ROC in three conditions:
* 1. there are ROC lsegs * 1. there are ROC lsegs
* 2. we don't send layoutreturn * 2. we don't send layoutreturn
* 3. no others are sending layoutreturn
*/ */
if (found && !layoutreturn && pnfs_prepare_layoutreturn(lo)) if (found && !layoutreturn) {
/* lo ref dropped in pnfs_roc_release() */
pnfs_get_layout_hdr(lo);
roc = true; roc = true;
}
out_noroc: out_noroc:
spin_unlock(&ino->i_lock); spin_unlock(&ino->i_lock);
...@@ -1172,6 +1167,26 @@ void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier) ...@@ -1172,6 +1167,26 @@ void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier)
spin_unlock(&ino->i_lock); spin_unlock(&ino->i_lock);
} }
bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
{
struct nfs_inode *nfsi = NFS_I(ino);
struct pnfs_layout_hdr *lo;
bool sleep = false;
/* we might not have grabbed lo reference. so need to check under
* i_lock */
spin_lock(&ino->i_lock);
lo = nfsi->layout;
if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
sleep = true;
spin_unlock(&ino->i_lock);
if (sleep)
rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
return sleep;
}
/* /*
* Compare two layout segments for sorting into layout cache. * Compare two layout segments for sorting into layout cache.
* We want to preferentially return RW over RO layouts, so ensure those * We want to preferentially return RW over RO layouts, so ensure those
......
...@@ -270,6 +270,7 @@ bool pnfs_roc(struct inode *ino); ...@@ -270,6 +270,7 @@ bool pnfs_roc(struct inode *ino);
void pnfs_roc_release(struct inode *ino); void pnfs_roc_release(struct inode *ino);
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier); void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier);
bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task);
void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t); void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t);
void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
...@@ -639,6 +640,12 @@ pnfs_roc_get_barrier(struct inode *ino, u32 *barrier) ...@@ -639,6 +640,12 @@ pnfs_roc_get_barrier(struct inode *ino, u32 *barrier)
{ {
} }
static inline bool
pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
{
return false;
}
static inline void set_pnfs_layoutdriver(struct nfs_server *s, static inline void set_pnfs_layoutdriver(struct nfs_server *s,
const struct nfs_fh *mntfh, u32 id) const struct nfs_fh *mntfh, u32 id)
{ {
......
...@@ -72,6 +72,9 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) ...@@ -72,6 +72,9 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
{ {
struct nfs_pgio_mirror *mirror; struct nfs_pgio_mirror *mirror;
if (pgio->pg_ops && pgio->pg_ops->pg_cleanup)
pgio->pg_ops->pg_cleanup(pgio);
pgio->pg_ops = &nfs_pgio_rw_ops; pgio->pg_ops = &nfs_pgio_rw_ops;
/* read path should never have more than one mirror */ /* read path should never have more than one mirror */
......
...@@ -1351,6 +1351,9 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) ...@@ -1351,6 +1351,9 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
{ {
struct nfs_pgio_mirror *mirror; struct nfs_pgio_mirror *mirror;
if (pgio->pg_ops && pgio->pg_ops->pg_cleanup)
pgio->pg_ops->pg_cleanup(pgio);
pgio->pg_ops = &nfs_pgio_rw_ops; pgio->pg_ops = &nfs_pgio_rw_ops;
nfs_pageio_stop_mirroring(pgio); nfs_pageio_stop_mirroring(pgio);
......
...@@ -42,6 +42,7 @@ struct sock_xprt { ...@@ -42,6 +42,7 @@ struct sock_xprt {
/* /*
* Connection of transports * Connection of transports
*/ */
unsigned long sock_state;
struct delayed_work connect_worker; struct delayed_work connect_worker;
struct sockaddr_storage srcaddr; struct sockaddr_storage srcaddr;
unsigned short srcport; unsigned short srcport;
...@@ -76,6 +77,8 @@ struct sock_xprt { ...@@ -76,6 +77,8 @@ struct sock_xprt {
*/ */
#define TCP_RPC_REPLY (1UL << 6) #define TCP_RPC_REPLY (1UL << 6)
#define XPRT_SOCK_CONNECTING 1U
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _LINUX_SUNRPC_XPRTSOCK_H */ #endif /* _LINUX_SUNRPC_XPRTSOCK_H */
...@@ -1092,14 +1092,10 @@ void ...@@ -1092,14 +1092,10 @@ void
rpc_destroy_mempool(void) rpc_destroy_mempool(void)
{ {
rpciod_stop(); rpciod_stop();
if (rpc_buffer_mempool) mempool_destroy(rpc_buffer_mempool);
mempool_destroy(rpc_buffer_mempool); mempool_destroy(rpc_task_mempool);
if (rpc_task_mempool) kmem_cache_destroy(rpc_task_slabp);
mempool_destroy(rpc_task_mempool); kmem_cache_destroy(rpc_buffer_slabp);
if (rpc_task_slabp)
kmem_cache_destroy(rpc_task_slabp);
if (rpc_buffer_slabp)
kmem_cache_destroy(rpc_buffer_slabp);
rpc_destroy_wait_queue(&delay_queue); rpc_destroy_wait_queue(&delay_queue);
} }
......
...@@ -614,6 +614,7 @@ static void xprt_autoclose(struct work_struct *work) ...@@ -614,6 +614,7 @@ static void xprt_autoclose(struct work_struct *work)
clear_bit(XPRT_CLOSE_WAIT, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
xprt->ops->close(xprt); xprt->ops->close(xprt);
xprt_release_write(xprt, NULL); xprt_release_write(xprt, NULL);
wake_up_bit(&xprt->state, XPRT_LOCKED);
} }
/** /**
...@@ -723,6 +724,7 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie) ...@@ -723,6 +724,7 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
xprt->ops->release_xprt(xprt, NULL); xprt->ops->release_xprt(xprt, NULL);
out: out:
spin_unlock_bh(&xprt->transport_lock); spin_unlock_bh(&xprt->transport_lock);
wake_up_bit(&xprt->state, XPRT_LOCKED);
} }
/** /**
...@@ -1394,6 +1396,10 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args) ...@@ -1394,6 +1396,10 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
static void xprt_destroy(struct rpc_xprt *xprt) static void xprt_destroy(struct rpc_xprt *xprt)
{ {
dprintk("RPC: destroying transport %p\n", xprt); dprintk("RPC: destroying transport %p\n", xprt);
/* Exclude transport connect/disconnect handlers */
wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE);
del_timer_sync(&xprt->timer); del_timer_sync(&xprt->timer);
rpc_xprt_debugfs_unregister(xprt); rpc_xprt_debugfs_unregister(xprt);
......
...@@ -777,7 +777,6 @@ static void xs_sock_mark_closed(struct rpc_xprt *xprt) ...@@ -777,7 +777,6 @@ static void xs_sock_mark_closed(struct rpc_xprt *xprt)
xs_sock_reset_connection_flags(xprt); xs_sock_reset_connection_flags(xprt);
/* Mark transport as closed and wake up all pending tasks */ /* Mark transport as closed and wake up all pending tasks */
xprt_disconnect_done(xprt); xprt_disconnect_done(xprt);
xprt_force_disconnect(xprt);
} }
/** /**
...@@ -881,8 +880,11 @@ static void xs_xprt_free(struct rpc_xprt *xprt) ...@@ -881,8 +880,11 @@ static void xs_xprt_free(struct rpc_xprt *xprt)
*/ */
static void xs_destroy(struct rpc_xprt *xprt) static void xs_destroy(struct rpc_xprt *xprt)
{ {
struct sock_xprt *transport = container_of(xprt,
struct sock_xprt, xprt);
dprintk("RPC: xs_destroy xprt %p\n", xprt); dprintk("RPC: xs_destroy xprt %p\n", xprt);
cancel_delayed_work_sync(&transport->connect_worker);
xs_close(xprt); xs_close(xprt);
xs_xprt_free(xprt); xs_xprt_free(xprt);
module_put(THIS_MODULE); module_put(THIS_MODULE);
...@@ -1435,6 +1437,7 @@ static void xs_tcp_data_ready(struct sock *sk) ...@@ -1435,6 +1437,7 @@ static void xs_tcp_data_ready(struct sock *sk)
static void xs_tcp_state_change(struct sock *sk) static void xs_tcp_state_change(struct sock *sk)
{ {
struct rpc_xprt *xprt; struct rpc_xprt *xprt;
struct sock_xprt *transport;
read_lock_bh(&sk->sk_callback_lock); read_lock_bh(&sk->sk_callback_lock);
if (!(xprt = xprt_from_sock(sk))) if (!(xprt = xprt_from_sock(sk)))
...@@ -1446,13 +1449,12 @@ static void xs_tcp_state_change(struct sock *sk) ...@@ -1446,13 +1449,12 @@ static void xs_tcp_state_change(struct sock *sk)
sock_flag(sk, SOCK_ZAPPED), sock_flag(sk, SOCK_ZAPPED),
sk->sk_shutdown); sk->sk_shutdown);
transport = container_of(xprt, struct sock_xprt, xprt);
trace_rpc_socket_state_change(xprt, sk->sk_socket); trace_rpc_socket_state_change(xprt, sk->sk_socket);
switch (sk->sk_state) { switch (sk->sk_state) {
case TCP_ESTABLISHED: case TCP_ESTABLISHED:
spin_lock(&xprt->transport_lock); spin_lock(&xprt->transport_lock);
if (!xprt_test_and_set_connected(xprt)) { if (!xprt_test_and_set_connected(xprt)) {
struct sock_xprt *transport = container_of(xprt,
struct sock_xprt, xprt);
/* Reset TCP record info */ /* Reset TCP record info */
transport->tcp_offset = 0; transport->tcp_offset = 0;
...@@ -1461,6 +1463,8 @@ static void xs_tcp_state_change(struct sock *sk) ...@@ -1461,6 +1463,8 @@ static void xs_tcp_state_change(struct sock *sk)
transport->tcp_flags = transport->tcp_flags =
TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
xprt->connect_cookie++; xprt->connect_cookie++;
clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
xprt_clear_connecting(xprt);
xprt_wake_pending_tasks(xprt, -EAGAIN); xprt_wake_pending_tasks(xprt, -EAGAIN);
} }
...@@ -1496,6 +1500,9 @@ static void xs_tcp_state_change(struct sock *sk) ...@@ -1496,6 +1500,9 @@ static void xs_tcp_state_change(struct sock *sk)
smp_mb__after_atomic(); smp_mb__after_atomic();
break; break;
case TCP_CLOSE: case TCP_CLOSE:
if (test_and_clear_bit(XPRT_SOCK_CONNECTING,
&transport->sock_state))
xprt_clear_connecting(xprt);
xs_sock_mark_closed(xprt); xs_sock_mark_closed(xprt);
} }
out: out:
...@@ -2179,6 +2186,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) ...@@ -2179,6 +2186,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
/* Tell the socket layer to start connecting... */ /* Tell the socket layer to start connecting... */
xprt->stat.connect_count++; xprt->stat.connect_count++;
xprt->stat.connect_start = jiffies; xprt->stat.connect_start = jiffies;
set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
switch (ret) { switch (ret) {
case 0: case 0:
...@@ -2240,7 +2248,6 @@ static void xs_tcp_setup_socket(struct work_struct *work) ...@@ -2240,7 +2248,6 @@ static void xs_tcp_setup_socket(struct work_struct *work)
case -EINPROGRESS: case -EINPROGRESS:
case -EALREADY: case -EALREADY:
xprt_unlock_connect(xprt, transport); xprt_unlock_connect(xprt, transport);
xprt_clear_connecting(xprt);
return; return;
case -EINVAL: case -EINVAL:
/* Happens, for instance, if the user specified a link /* Happens, for instance, if the user specified a link
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment