Commit 7f27392c authored by Trond Myklebust's avatar Trond Myklebust

pNFS: Fix races between return-on-close and layoutreturn.

If one or more of the layout segments reports an error during I/O, then
we may have to send a layoutreturn to report the error back to the NFS
metadata server.
This patch ensures that the return-on-close code can detect the
outstanding layoutreturn, and not preempt it.
Signed-off-by: default avatarTrond Myklebust <trond.myklebust@primarydata.com>
parent df9cecc1
...@@ -7972,8 +7972,6 @@ static void nfs4_layoutreturn_release(void *calldata) ...@@ -7972,8 +7972,6 @@ static void nfs4_layoutreturn_release(void *calldata)
pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range); pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
pnfs_clear_layoutreturn_waitbit(lo); pnfs_clear_layoutreturn_waitbit(lo);
clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
lo->plh_block_lgets--; lo->plh_block_lgets--;
spin_unlock(&lo->plh_inode->i_lock); spin_unlock(&lo->plh_inode->i_lock);
pnfs_free_lseg_list(&freeme); pnfs_free_lseg_list(&freeme);
......
...@@ -352,7 +352,7 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo, ...@@ -352,7 +352,7 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
{ {
struct pnfs_layout_segment *s; struct pnfs_layout_segment *s;
if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
return false; return false;
list_for_each_entry(s, &lo->plh_segs, pls_list) list_for_each_entry(s, &lo->plh_segs, pls_list)
...@@ -362,6 +362,18 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo, ...@@ -362,6 +362,18 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
return true; return true;
} }
static bool
pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
{
if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
return false;
lo->plh_return_iomode = 0;
lo->plh_block_lgets++;
pnfs_get_layout_hdr(lo);
clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
return true;
}
static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
struct pnfs_layout_hdr *lo, struct inode *inode) struct pnfs_layout_hdr *lo, struct inode *inode)
{ {
...@@ -372,17 +384,16 @@ static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, ...@@ -372,17 +384,16 @@ static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
if (pnfs_layout_need_return(lo, lseg)) { if (pnfs_layout_need_return(lo, lseg)) {
nfs4_stateid stateid; nfs4_stateid stateid;
enum pnfs_iomode iomode; enum pnfs_iomode iomode;
bool send;
stateid = lo->plh_stateid; stateid = lo->plh_stateid;
iomode = lo->plh_return_iomode; iomode = lo->plh_return_iomode;
/* decreased in pnfs_send_layoutreturn() */ send = pnfs_prepare_layoutreturn(lo);
lo->plh_block_lgets++;
lo->plh_return_iomode = 0;
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
pnfs_get_layout_hdr(lo); if (send) {
/* Send an async layoutreturn so we dont deadlock */
/* Send an async layoutreturn so we dont deadlock */ pnfs_send_layoutreturn(lo, stateid, iomode, false);
pnfs_send_layoutreturn(lo, stateid, iomode, false); }
} else } else
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
} }
...@@ -924,6 +935,7 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) ...@@ -924,6 +935,7 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags);
smp_mb__after_atomic(); smp_mb__after_atomic();
wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
} }
static int static int
...@@ -978,6 +990,7 @@ _pnfs_return_layout(struct inode *ino) ...@@ -978,6 +990,7 @@ _pnfs_return_layout(struct inode *ino)
LIST_HEAD(tmp_list); LIST_HEAD(tmp_list);
nfs4_stateid stateid; nfs4_stateid stateid;
int status = 0, empty; int status = 0, empty;
bool send;
dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino);
...@@ -1007,17 +1020,18 @@ _pnfs_return_layout(struct inode *ino) ...@@ -1007,17 +1020,18 @@ _pnfs_return_layout(struct inode *ino)
/* Don't send a LAYOUTRETURN if list was initially empty */ /* Don't send a LAYOUTRETURN if list was initially empty */
if (empty) { if (empty) {
spin_unlock(&ino->i_lock); spin_unlock(&ino->i_lock);
pnfs_put_layout_hdr(lo);
dprintk("NFS: %s no layout segments to return\n", __func__); dprintk("NFS: %s no layout segments to return\n", __func__);
goto out; goto out_put_layout_hdr;
} }
set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
lo->plh_block_lgets++; send = pnfs_prepare_layoutreturn(lo);
spin_unlock(&ino->i_lock); spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&tmp_list); pnfs_free_lseg_list(&tmp_list);
if (send)
status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
out_put_layout_hdr:
pnfs_put_layout_hdr(lo);
out: out:
dprintk("<-- %s status: %d\n", __func__, status); dprintk("<-- %s status: %d\n", __func__, status);
return status; return status;
...@@ -1097,13 +1111,9 @@ bool pnfs_roc(struct inode *ino) ...@@ -1097,13 +1111,9 @@ bool pnfs_roc(struct inode *ino)
out_noroc: out_noroc:
if (lo) { if (lo) {
stateid = lo->plh_stateid; stateid = lo->plh_stateid;
layoutreturn = if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags))
&lo->plh_flags); layoutreturn = pnfs_prepare_layoutreturn(lo);
if (layoutreturn) {
lo->plh_block_lgets++;
pnfs_get_layout_hdr(lo);
}
} }
spin_unlock(&ino->i_lock); spin_unlock(&ino->i_lock);
if (layoutreturn) { if (layoutreturn) {
...@@ -1163,16 +1173,14 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) ...@@ -1163,16 +1173,14 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task)
*/ */
*barrier = current_seqid + atomic_read(&lo->plh_outstanding); *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
stateid = lo->plh_stateid; stateid = lo->plh_stateid;
layoutreturn = test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
&lo->plh_flags); &lo->plh_flags))
if (layoutreturn) { layoutreturn = pnfs_prepare_layoutreturn(lo);
lo->plh_block_lgets++; if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
pnfs_get_layout_hdr(lo); rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
}
spin_unlock(&ino->i_lock); spin_unlock(&ino->i_lock);
if (layoutreturn) { if (layoutreturn) {
rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false); pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false);
return true; return true;
} }
...@@ -1693,7 +1701,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, ...@@ -1693,7 +1701,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
/* set failure bit so that pnfs path will be retried later */ /* set failure bit so that pnfs path will be retried later */
pnfs_layout_set_fail_bit(lo, iomode); pnfs_layout_set_fail_bit(lo, iomode);
set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
if (lo->plh_return_iomode == 0) if (lo->plh_return_iomode == 0)
lo->plh_return_iomode = range.iomode; lo->plh_return_iomode = range.iomode;
else if (lo->plh_return_iomode != range.iomode) else if (lo->plh_return_iomode != range.iomode)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment