Commit 75f26df6 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfs-for-4.5-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable fixes:
   - Fix a regression in the SunRPC socket polling code
   - Fix the attribute cache revalidation code
   - Fix race in __update_open_stateid()
   - Fix an lo->plh_block_lgets imbalance in layoutreturn
   - Fix an Oopsable typo in ff_mirror_match_fh()

  Features:
   - pNFS layout recall performance improvements.
   - pNFS/flexfiles: Support server-supplied layoutstats sampling period

  Bugfixes + cleanups:
   - NFSv4: Don't perform cached access checks before we've OPENed the
     file
   - Fix starvation issues with background flushes
   - Reclaim writes should be flushed as unstable writes if there are
     already entries in the commit lists
   - Various bugfixes from Chuck to fix NFS/RDMA send queue ordering
     problems
   - Ensure that we propagate fatal layoutget errors back to the
     application
   - Fixes for sundry flexfiles layoutstats bugs
   - Fix files/flexfiles to not cache invalidated layouts in the DS
     commit buckets"

* tag 'nfs-for-4.5-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (68 commits)
  NFS: Fix a compile warning about unused variable in nfs_generic_pg_pgios()
  NFSv4: Fix a compile warning about no prototype for nfs4_ioctl()
  NFS: Use wait_on_atomic_t() for unlock after readahead
  SUNRPC: Fixup socket wait for memory
  NFSv4.1/pNFS: Cleanup constify struct pnfs_layout_range arguments
  NFSv4.1/pnfs: Cleanup copying of pnfs_layout_range structures
  NFSv4.1/pNFS: Cleanup pnfs_mark_matching_lsegs_invalid()
  NFSv4.1/pNFS: Fix a race in initiate_file_draining()
  NFSv4.1/pNFS: pnfs_error_mark_layout_for_return() must always return layout
  NFSv4.1/pNFS: pnfs_mark_matching_lsegs_return() should set the iomode
  NFSv4.1/pNFS: Use nfs4_stateid_copy for copying stateids
  NFSv4.1/pNFS: Don't pass stateids by value to pnfs_send_layoutreturn()
  NFS: Relax requirements in nfs_flush_incompatible
  NFSv4.1/pNFS: Don't queue up a new commit if the layout segment is invalid
  NFS: Allow multiple commit requests in flight per file
  NFS/pNFS: Fix up pNFS write reschedule layering violations and bugs
  SUNRPC: Fix a missing break in rpc_anyaddr()
  pNFS/flexfiles: Fix an Oopsable typo in ff_mirror_match_fh()
  NFS: Fix attribute cache revalidation
  NFS: Ensure we revalidate attributes before using execute_ok()
  ...
parents 63f729cb 44aab3e0
...@@ -83,8 +83,11 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, ...@@ -83,8 +83,11 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
res = htonl(NFS4ERR_BADHANDLE); res = htonl(NFS4ERR_BADHANDLE);
inode = nfs_delegation_find_inode(cps->clp, &args->fh); inode = nfs_delegation_find_inode(cps->clp, &args->fh);
if (inode == NULL) if (inode == NULL) {
trace_nfs4_cb_recall(cps->clp, &args->fh, NULL,
&args->stateid, -ntohl(res));
goto out; goto out;
}
/* Set up a helper thread to actually return the delegation */ /* Set up a helper thread to actually return the delegation */
switch (nfs_async_inode_return_delegation(inode, &args->stateid)) { switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
case 0: case 0:
...@@ -96,7 +99,8 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, ...@@ -96,7 +99,8 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
default: default:
res = htonl(NFS4ERR_RESOURCE); res = htonl(NFS4ERR_RESOURCE);
} }
trace_nfs4_recall_delegation(inode, -ntohl(res)); trace_nfs4_cb_recall(cps->clp, &args->fh, inode,
&args->stateid, -ntohl(res));
iput(inode); iput(inode);
out: out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
...@@ -160,6 +164,22 @@ static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, ...@@ -160,6 +164,22 @@ static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp,
return lo; return lo;
} }
/*
* Enforce RFC5661 section 12.5.5.2.1. (Layout Recall and Return Sequencing)
*/
static bool pnfs_check_stateid_sequence(struct pnfs_layout_hdr *lo,
const nfs4_stateid *new)
{
u32 oldseq, newseq;
oldseq = be32_to_cpu(lo->plh_stateid.seqid);
newseq = be32_to_cpu(new->seqid);
if (newseq > oldseq + 1)
return false;
return true;
}
static u32 initiate_file_draining(struct nfs_client *clp, static u32 initiate_file_draining(struct nfs_client *clp,
struct cb_layoutrecallargs *args) struct cb_layoutrecallargs *args)
{ {
...@@ -169,34 +189,52 @@ static u32 initiate_file_draining(struct nfs_client *clp, ...@@ -169,34 +189,52 @@ static u32 initiate_file_draining(struct nfs_client *clp,
LIST_HEAD(free_me_list); LIST_HEAD(free_me_list);
lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid); lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid);
if (!lo) if (!lo) {
trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, NULL,
&args->cbl_stateid, -rv);
goto out; goto out;
}
ino = lo->plh_inode; ino = lo->plh_inode;
spin_lock(&ino->i_lock); spin_lock(&ino->i_lock);
if (!pnfs_check_stateid_sequence(lo, &args->cbl_stateid)) {
rv = NFS4ERR_DELAY;
goto unlock;
}
pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
spin_unlock(&ino->i_lock); spin_unlock(&ino->i_lock);
pnfs_layoutcommit_inode(ino, false); pnfs_layoutcommit_inode(ino, false);
spin_lock(&ino->i_lock); spin_lock(&ino->i_lock);
if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || /*
pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, * Enforce RFC5661 Section 12.5.5.2.1.5 (Bulk Recall and Return)
&args->cbl_range)) { */
if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
rv = NFS4ERR_DELAY; rv = NFS4ERR_DELAY;
goto unlock; goto unlock;
} }
if (pnfs_mark_matching_lsegs_return(lo, &free_me_list,
&args->cbl_range)) {
rv = NFS4_OK;
goto unlock;
}
if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo,
&args->cbl_range); &args->cbl_range);
} }
pnfs_mark_layout_returned_if_empty(lo);
unlock: unlock:
spin_unlock(&ino->i_lock); spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&free_me_list); pnfs_free_lseg_list(&free_me_list);
/* Free all lsegs that are attached to commit buckets */
nfs_commit_inode(ino, 0);
pnfs_put_layout_hdr(lo); pnfs_put_layout_hdr(lo);
trace_nfs4_cb_layoutrecall_inode(clp, &args->cbl_fh, ino, -rv); trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino,
&args->cbl_stateid, -rv);
iput(ino); iput(ino);
out: out:
return rv; return rv;
......
...@@ -2431,6 +2431,20 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) ...@@ -2431,6 +2431,20 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
} }
EXPORT_SYMBOL_GPL(nfs_may_open); EXPORT_SYMBOL_GPL(nfs_may_open);
static int nfs_execute_ok(struct inode *inode, int mask)
{
struct nfs_server *server = NFS_SERVER(inode);
int ret;
if (mask & MAY_NOT_BLOCK)
ret = nfs_revalidate_inode_rcu(server, inode);
else
ret = nfs_revalidate_inode(server, inode);
if (ret == 0 && !execute_ok(inode))
ret = -EACCES;
return ret;
}
int nfs_permission(struct inode *inode, int mask) int nfs_permission(struct inode *inode, int mask)
{ {
struct rpc_cred *cred; struct rpc_cred *cred;
...@@ -2448,6 +2462,9 @@ int nfs_permission(struct inode *inode, int mask) ...@@ -2448,6 +2462,9 @@ int nfs_permission(struct inode *inode, int mask)
case S_IFLNK: case S_IFLNK:
goto out; goto out;
case S_IFREG: case S_IFREG:
if ((mask & MAY_OPEN) &&
nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN))
return 0;
break; break;
case S_IFDIR: case S_IFDIR:
/* /*
...@@ -2480,8 +2497,8 @@ int nfs_permission(struct inode *inode, int mask) ...@@ -2480,8 +2497,8 @@ int nfs_permission(struct inode *inode, int mask)
res = PTR_ERR(cred); res = PTR_ERR(cred);
} }
out: out:
if (!res && (mask & MAY_EXEC) && !execute_ok(inode)) if (!res && (mask & MAY_EXEC))
res = -EACCES; res = nfs_execute_ok(inode, mask);
dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n", dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n",
inode->i_sb->s_id, inode->i_ino, mask, res); inode->i_sb->s_id, inode->i_ino, mask, res);
......
...@@ -117,12 +117,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq) ...@@ -117,12 +117,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
return atomic_dec_and_test(&dreq->io_count); return atomic_dec_and_test(&dreq->io_count);
} }
void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq)
{
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
}
EXPORT_SYMBOL_GPL(nfs_direct_set_resched_writes);
static void static void
nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr)
{ {
...@@ -670,6 +664,10 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) ...@@ -670,6 +664,10 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
req = nfs_list_entry(reqs.next); req = nfs_list_entry(reqs.next);
nfs_direct_setup_mirroring(dreq, &desc, req); nfs_direct_setup_mirroring(dreq, &desc, req);
if (desc.pg_error < 0) {
list_splice_init(&reqs, &failed);
goto out_failed;
}
list_for_each_entry_safe(req, tmp, &reqs, wb_list) { list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
if (!nfs_pageio_add_request(&desc, req)) { if (!nfs_pageio_add_request(&desc, req)) {
...@@ -677,6 +675,9 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) ...@@ -677,6 +675,9 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
nfs_list_add_request(req, &failed); nfs_list_add_request(req, &failed);
spin_lock(cinfo.lock); spin_lock(cinfo.lock);
dreq->flags = 0; dreq->flags = 0;
if (desc.pg_error < 0)
dreq->error = desc.pg_error;
else
dreq->error = -EIO; dreq->error = -EIO;
spin_unlock(cinfo.lock); spin_unlock(cinfo.lock);
} }
...@@ -684,6 +685,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) ...@@ -684,6 +685,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
} }
nfs_pageio_complete(&desc); nfs_pageio_complete(&desc);
out_failed:
while (!list_empty(&failed)) { while (!list_empty(&failed)) {
req = nfs_list_entry(failed.next); req = nfs_list_entry(failed.next);
nfs_list_remove_request(req); nfs_list_remove_request(req);
...@@ -727,14 +729,20 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) ...@@ -727,14 +729,20 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
nfs_direct_write_complete(dreq, data->inode); nfs_direct_write_complete(dreq, data->inode);
} }
static void nfs_direct_error_cleanup(struct nfs_inode *nfsi) static void nfs_direct_resched_write(struct nfs_commit_info *cinfo,
struct nfs_page *req)
{ {
/* There is no lock to clear */ struct nfs_direct_req *dreq = cinfo->dreq;
spin_lock(&dreq->lock);
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
spin_unlock(&dreq->lock);
nfs_mark_request_commit(req, NULL, cinfo, 0);
} }
static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
.completion = nfs_direct_commit_complete, .completion = nfs_direct_commit_complete,
.error_cleanup = nfs_direct_error_cleanup, .resched_write = nfs_direct_resched_write,
}; };
static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
...@@ -839,10 +847,25 @@ static void nfs_write_sync_pgio_error(struct list_head *head) ...@@ -839,10 +847,25 @@ static void nfs_write_sync_pgio_error(struct list_head *head)
} }
} }
static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
{
struct nfs_direct_req *dreq = hdr->dreq;
spin_lock(&dreq->lock);
if (dreq->error == 0) {
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
/* fake unstable write to let common nfs resend pages */
hdr->verf.committed = NFS_UNSTABLE;
hdr->good_bytes = hdr->args.count;
}
spin_unlock(&dreq->lock);
}
static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
.error_cleanup = nfs_write_sync_pgio_error, .error_cleanup = nfs_write_sync_pgio_error,
.init_hdr = nfs_direct_pgio_init, .init_hdr = nfs_direct_pgio_init,
.completion = nfs_direct_write_completion, .completion = nfs_direct_write_completion,
.reschedule_io = nfs_direct_write_reschedule_io,
}; };
...@@ -900,6 +923,11 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, ...@@ -900,6 +923,11 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
} }
nfs_direct_setup_mirroring(dreq, &desc, req); nfs_direct_setup_mirroring(dreq, &desc, req);
if (desc.pg_error < 0) {
nfs_free_request(req);
result = desc.pg_error;
break;
}
nfs_lock_request(req); nfs_lock_request(req);
req->wb_index = pos >> PAGE_SHIFT; req->wb_index = pos >> PAGE_SHIFT;
......
...@@ -514,7 +514,7 @@ static void nfs_check_dirty_writeback(struct page *page, ...@@ -514,7 +514,7 @@ static void nfs_check_dirty_writeback(struct page *page,
* so it will not block due to pages that will shortly be freeable. * so it will not block due to pages that will shortly be freeable.
*/ */
nfsi = NFS_I(mapping->host); nfsi = NFS_I(mapping->host);
if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) { if (atomic_read(&nfsi->commit_info.rpcs_out)) {
*writeback = true; *writeback = true;
return; return;
} }
...@@ -545,7 +545,7 @@ static int nfs_launder_page(struct page *page) ...@@ -545,7 +545,7 @@ static int nfs_launder_page(struct page *page)
inode->i_ino, (long long)page_offset(page)); inode->i_ino, (long long)page_offset(page));
nfs_fscache_wait_on_page_write(nfsi, page); nfs_fscache_wait_on_page_write(nfsi, page);
return nfs_wb_page(inode, page); return nfs_wb_launder_page(inode, page);
} }
static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
...@@ -756,7 +756,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) ...@@ -756,7 +756,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); l_ctx = nfs_get_lock_context(nfs_file_open_context(filp));
if (!IS_ERR(l_ctx)) { if (!IS_ERR(l_ctx)) {
status = nfs_iocounter_wait(&l_ctx->io_count); status = nfs_iocounter_wait(l_ctx);
nfs_put_lock_context(l_ctx); nfs_put_lock_context(l_ctx);
if (status < 0) if (status < 0)
return status; return status;
......
...@@ -202,6 +202,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, ...@@ -202,6 +202,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
task->tk_status); task->tk_status);
nfs4_mark_deviceid_unavailable(devid); nfs4_mark_deviceid_unavailable(devid);
pnfs_error_mark_layout_for_return(inode, lseg); pnfs_error_mark_layout_for_return(inode, lseg);
pnfs_set_lo_fail(lseg);
rpc_wake_up(&tbl->slot_tbl_waitq); rpc_wake_up(&tbl->slot_tbl_waitq);
/* fall through */ /* fall through */
default: default:
...@@ -883,13 +884,19 @@ static void ...@@ -883,13 +884,19 @@ static void
filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req) struct nfs_page *req)
{ {
if (!pgio->pg_lseg) if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context, req->wb_context,
0, 0,
NFS4_MAX_UINT64, NFS4_MAX_UINT64,
IOMODE_READ, IOMODE_READ,
GFP_KERNEL); GFP_KERNEL);
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
return;
}
}
/* If no lseg, fall back to read through mds */ /* If no lseg, fall back to read through mds */
if (pgio->pg_lseg == NULL) if (pgio->pg_lseg == NULL)
nfs_pageio_reset_read_mds(pgio); nfs_pageio_reset_read_mds(pgio);
...@@ -902,13 +909,20 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, ...@@ -902,13 +909,20 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_commit_info cinfo; struct nfs_commit_info cinfo;
int status; int status;
if (!pgio->pg_lseg) if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context, req->wb_context,
0, 0,
NFS4_MAX_UINT64, NFS4_MAX_UINT64,
IOMODE_RW, IOMODE_RW,
GFP_NOFS); GFP_NOFS);
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
return;
}
}
/* If no lseg, fall back to write through mds */ /* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL) if (pgio->pg_lseg == NULL)
goto out_mds; goto out_mds;
......
This diff is collapsed.
...@@ -85,6 +85,7 @@ struct nfs4_ff_layout_mirror { ...@@ -85,6 +85,7 @@ struct nfs4_ff_layout_mirror {
struct nfs4_ff_layoutstat write_stat; struct nfs4_ff_layoutstat write_stat;
ktime_t start_time; ktime_t start_time;
ktime_t last_report_time; ktime_t last_report_time;
u32 report_interval;
}; };
struct nfs4_ff_layout_segment { struct nfs4_ff_layout_segment {
......
...@@ -429,22 +429,14 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, ...@@ -429,22 +429,14 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
mirror, lseg->pls_range.offset, mirror, lseg->pls_range.offset,
lseg->pls_range.length, NFS4ERR_NXIO, lseg->pls_range.length, NFS4ERR_NXIO,
OP_ILLEGAL, GFP_NOIO); OP_ILLEGAL, GFP_NOIO);
if (fail_return) { if (!fail_return) {
pnfs_error_mark_layout_for_return(ino, lseg);
if (ff_layout_has_available_ds(lseg))
pnfs_set_retry_layoutget(lseg->pls_layout);
else
pnfs_clear_retry_layoutget(lseg->pls_layout);
} else {
if (ff_layout_has_available_ds(lseg)) if (ff_layout_has_available_ds(lseg))
set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
&lseg->pls_layout->plh_flags); &lseg->pls_layout->plh_flags);
else { else
pnfs_error_mark_layout_for_return(ino, lseg);
} else
pnfs_error_mark_layout_for_return(ino, lseg); pnfs_error_mark_layout_for_return(ino, lseg);
pnfs_clear_retry_layoutget(lseg->pls_layout);
}
}
} }
out_update_creds: out_update_creds:
if (ff_layout_update_mirror_cred(mirror, ds)) if (ff_layout_update_mirror_cred(mirror, ds))
......
...@@ -71,19 +71,25 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) ...@@ -71,19 +71,25 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
return nfs_fileid_to_ino_t(fattr->fileid); return nfs_fileid_to_ino_t(fattr->fileid);
} }
/** static int nfs_wait_killable(int mode)
* nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
* @word: long word containing the bit lock
*/
int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
{ {
freezable_schedule_unsafe(); freezable_schedule_unsafe();
if (signal_pending_state(mode, current)) if (signal_pending_state(mode, current))
return -ERESTARTSYS; return -ERESTARTSYS;
return 0; return 0;
} }
int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
{
return nfs_wait_killable(mode);
}
EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
int nfs_wait_atomic_killable(atomic_t *p)
{
return nfs_wait_killable(TASK_KILLABLE);
}
/** /**
* nfs_compat_user_ino64 - returns the user-visible inode number * nfs_compat_user_ino64 - returns the user-visible inode number
* @fileid: 64-bit fileid * @fileid: 64-bit fileid
...@@ -700,7 +706,7 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) ...@@ -700,7 +706,7 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
l_ctx->lockowner.l_owner = current->files; l_ctx->lockowner.l_owner = current->files;
l_ctx->lockowner.l_pid = current->tgid; l_ctx->lockowner.l_pid = current->tgid;
INIT_LIST_HEAD(&l_ctx->list); INIT_LIST_HEAD(&l_ctx->list);
nfs_iocounter_init(&l_ctx->io_count); atomic_set(&l_ctx->io_count, 0);
} }
static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx) static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
...@@ -913,6 +919,12 @@ void nfs_file_clear_open_context(struct file *filp) ...@@ -913,6 +919,12 @@ void nfs_file_clear_open_context(struct file *filp)
if (ctx) { if (ctx) {
struct inode *inode = d_inode(ctx->dentry); struct inode *inode = d_inode(ctx->dentry);
/*
* We fatal error on write before. Try to writeback
* every page again.
*/
if (ctx->error < 0)
invalidate_inode_pages2(inode->i_mapping);
filp->private_data = NULL; filp->private_data = NULL;
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
list_move_tail(&ctx->list, &NFS_I(inode)->open_files); list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
...@@ -1663,6 +1675,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) ...@@ -1663,6 +1675,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
unsigned long invalid = 0; unsigned long invalid = 0;
unsigned long now = jiffies; unsigned long now = jiffies;
unsigned long save_cache_validity; unsigned long save_cache_validity;
bool cache_revalidated = true;
dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n",
__func__, inode->i_sb->s_id, inode->i_ino, __func__, inode->i_sb->s_id, inode->i_ino,
...@@ -1724,22 +1737,28 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) ...@@ -1724,22 +1737,28 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfs_force_lookup_revalidate(inode); nfs_force_lookup_revalidate(inode);
inode->i_version = fattr->change_attr; inode->i_version = fattr->change_attr;
} }
} else } else {
nfsi->cache_validity |= save_cache_validity; nfsi->cache_validity |= save_cache_validity;
cache_revalidated = false;
}
if (fattr->valid & NFS_ATTR_FATTR_MTIME) { if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
} else if (server->caps & NFS_CAP_MTIME) } else if (server->caps & NFS_CAP_MTIME) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_ATTR (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_FORCED); | NFS_INO_REVAL_FORCED);
cache_revalidated = false;
}
if (fattr->valid & NFS_ATTR_FATTR_CTIME) { if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
} else if (server->caps & NFS_CAP_CTIME) } else if (server->caps & NFS_CAP_CTIME) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_ATTR (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_FORCED); | NFS_INO_REVAL_FORCED);
cache_revalidated = false;
}
/* Check if our cached file size is stale */ /* Check if our cached file size is stale */
if (fattr->valid & NFS_ATTR_FATTR_SIZE) { if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
...@@ -1759,19 +1778,23 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) ...@@ -1759,19 +1778,23 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
(long long)cur_isize, (long long)cur_isize,
(long long)new_isize); (long long)new_isize);
} }
} else } else {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_ATTR (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_PAGECACHE
| NFS_INO_REVAL_FORCED); | NFS_INO_REVAL_FORCED);
cache_revalidated = false;
}
if (fattr->valid & NFS_ATTR_FATTR_ATIME) if (fattr->valid & NFS_ATTR_FATTR_ATIME)
memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
else if (server->caps & NFS_CAP_ATIME) else if (server->caps & NFS_CAP_ATIME) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_ATIME (NFS_INO_INVALID_ATIME
| NFS_INO_REVAL_FORCED); | NFS_INO_REVAL_FORCED);
cache_revalidated = false;
}
if (fattr->valid & NFS_ATTR_FATTR_MODE) { if (fattr->valid & NFS_ATTR_FATTR_MODE) {
if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
...@@ -1780,36 +1803,42 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) ...@@ -1780,36 +1803,42 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_mode = newmode; inode->i_mode = newmode;
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
} }
} else if (server->caps & NFS_CAP_MODE) } else if (server->caps & NFS_CAP_MODE) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_ATTR (NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL | NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED); | NFS_INO_REVAL_FORCED);
cache_revalidated = false;
}
if (fattr->valid & NFS_ATTR_FATTR_OWNER) { if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
if (!uid_eq(inode->i_uid, fattr->uid)) { if (!uid_eq(inode->i_uid, fattr->uid)) {
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
inode->i_uid = fattr->uid; inode->i_uid = fattr->uid;
} }
} else if (server->caps & NFS_CAP_OWNER) } else if (server->caps & NFS_CAP_OWNER) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_ATTR (NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL | NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED); | NFS_INO_REVAL_FORCED);
cache_revalidated = false;
}
if (fattr->valid & NFS_ATTR_FATTR_GROUP) { if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
if (!gid_eq(inode->i_gid, fattr->gid)) { if (!gid_eq(inode->i_gid, fattr->gid)) {
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
inode->i_gid = fattr->gid; inode->i_gid = fattr->gid;
} }
} else if (server->caps & NFS_CAP_OWNER_GROUP) } else if (server->caps & NFS_CAP_OWNER_GROUP) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_ATTR (NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL | NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED); | NFS_INO_REVAL_FORCED);
cache_revalidated = false;
}
if (fattr->valid & NFS_ATTR_FATTR_NLINK) { if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
if (inode->i_nlink != fattr->nlink) { if (inode->i_nlink != fattr->nlink) {
...@@ -1818,19 +1847,22 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) ...@@ -1818,19 +1847,22 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
invalid |= NFS_INO_INVALID_DATA; invalid |= NFS_INO_INVALID_DATA;
set_nlink(inode, fattr->nlink); set_nlink(inode, fattr->nlink);
} }
} else if (server->caps & NFS_CAP_NLINK) } else if (server->caps & NFS_CAP_NLINK) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |= save_cache_validity &
(NFS_INO_INVALID_ATTR (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_FORCED); | NFS_INO_REVAL_FORCED);
cache_revalidated = false;
}
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
/* /*
* report the blocks in 512byte units * report the blocks in 512byte units
*/ */
inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
} } else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
inode->i_blocks = fattr->du.nfs2.blocks; inode->i_blocks = fattr->du.nfs2.blocks;
else
cache_revalidated = false;
/* Update attrtimeo value if we're out of the unstable period */ /* Update attrtimeo value if we're out of the unstable period */
if (invalid & NFS_INO_INVALID_ATTR) { if (invalid & NFS_INO_INVALID_ATTR) {
...@@ -1840,9 +1872,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) ...@@ -1840,9 +1872,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
/* Set barrier to be more recent than all outstanding updates */ /* Set barrier to be more recent than all outstanding updates */
nfsi->attr_gencount = nfs_inc_attr_generation_counter(); nfsi->attr_gencount = nfs_inc_attr_generation_counter();
} else { } else {
if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { if (cache_revalidated) {
if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) if (!time_in_range_open(now, nfsi->attrtimeo_timestamp,
nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
nfsi->attrtimeo <<= 1;
if (nfsi->attrtimeo > NFS_MAXATTRTIMEO(inode))
nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
}
nfsi->attrtimeo_timestamp = now; nfsi->attrtimeo_timestamp = now;
} }
/* Set the barrier to be more recent than this fattr */ /* Set the barrier to be more recent than this fattr */
...@@ -1851,7 +1887,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) ...@@ -1851,7 +1887,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
} }
/* Don't declare attrcache up to date if there were no attrs! */ /* Don't declare attrcache up to date if there were no attrs! */
if (fattr->valid != 0) if (cache_revalidated)
invalid &= ~NFS_INO_INVALID_ATTR; invalid &= ~NFS_INO_INVALID_ATTR;
/* Don't invalidate the data if we were to blame */ /* Don't invalidate the data if we were to blame */
......
...@@ -238,7 +238,7 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, ...@@ -238,7 +238,7 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr, struct nfs_pgio_header *hdr,
void (*release)(struct nfs_pgio_header *hdr)); void (*release)(struct nfs_pgio_header *hdr));
void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
int nfs_iocounter_wait(struct nfs_io_counter *c); int nfs_iocounter_wait(struct nfs_lock_context *l_ctx);
extern const struct nfs_pageio_ops nfs_pgio_rw_ops; extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *); struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *);
...@@ -252,18 +252,18 @@ void nfs_free_request(struct nfs_page *req); ...@@ -252,18 +252,18 @@ void nfs_free_request(struct nfs_page *req);
struct nfs_pgio_mirror * struct nfs_pgio_mirror *
nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc); nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc);
static inline void nfs_iocounter_init(struct nfs_io_counter *c)
{
c->flags = 0;
atomic_set(&c->io_count, 0);
}
static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc) static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc)
{ {
WARN_ON_ONCE(desc->pg_mirror_count < 1); WARN_ON_ONCE(desc->pg_mirror_count < 1);
return desc->pg_mirror_count > 1; return desc->pg_mirror_count > 1;
} }
static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1,
const struct nfs_open_context *ctx2)
{
return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state;
}
/* nfs2xdr.c */ /* nfs2xdr.c */
extern struct rpc_procinfo nfs_procedures[]; extern struct rpc_procinfo nfs_procedures[];
extern int nfs2_decode_dirent(struct xdr_stream *, extern int nfs2_decode_dirent(struct xdr_stream *,
...@@ -380,6 +380,7 @@ extern void nfs_clear_inode(struct inode *); ...@@ -380,6 +380,7 @@ extern void nfs_clear_inode(struct inode *);
extern void nfs_evict_inode(struct inode *); extern void nfs_evict_inode(struct inode *);
void nfs_zap_acl_cache(struct inode *inode); void nfs_zap_acl_cache(struct inode *inode);
extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
extern int nfs_wait_atomic_killable(atomic_t *p);
/* super.c */ /* super.c */
extern const struct super_operations nfs_sops; extern const struct super_operations nfs_sops;
...@@ -519,7 +520,6 @@ static inline void nfs_inode_dio_wait(struct inode *inode) ...@@ -519,7 +520,6 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
inode_dio_wait(inode); inode_dio_wait(inode);
} }
extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
extern void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq);
/* nfs4proc.c */ /* nfs4proc.c */
extern void __nfs4_read_done_cb(struct nfs_pgio_header *); extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
...@@ -696,9 +696,32 @@ static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) ...@@ -696,9 +696,32 @@ static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh)
{ {
return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size); return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size);
} }
static inline u32 nfs_stateid_hash(const nfs4_stateid *stateid)
{
return ~crc32_le(0xFFFFFFFF, &stateid->other[0],
NFS4_STATEID_OTHER_SIZE);
}
#else #else
static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh)
{ {
return 0; return 0;
} }
static inline u32 nfs_stateid_hash(nfs4_stateid *stateid)
{
return 0;
}
#endif #endif
static inline bool nfs_error_is_fatal(int err)
{
switch (err) {
case -ERESTARTSYS:
case -EIO:
case -ENOSPC:
case -EROFS:
case -E2BIG:
return true;
default:
return false;
}
}
...@@ -204,6 +204,8 @@ static void ...@@ -204,6 +204,8 @@ static void
nfs42_layoutstat_done(struct rpc_task *task, void *calldata) nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
{ {
struct nfs42_layoutstat_data *data = calldata; struct nfs42_layoutstat_data *data = calldata;
struct inode *inode = data->inode;
struct pnfs_layout_hdr *lo;
if (!nfs4_sequence_done(task, &data->res.seq_res)) if (!nfs4_sequence_done(task, &data->res.seq_res))
return; return;
...@@ -211,12 +213,35 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) ...@@ -211,12 +213,35 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
switch (task->tk_status) { switch (task->tk_status) {
case 0: case 0:
break; break;
case -NFS4ERR_EXPIRED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_BAD_STATEID:
spin_lock(&inode->i_lock);
lo = NFS_I(inode)->layout;
if (lo && nfs4_stateid_match(&data->args.stateid,
&lo->plh_stateid)) {
LIST_HEAD(head);
/*
* Mark the bad layout state as invalid, then retry
* with the current stateid.
*/
set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
pnfs_mark_matching_lsegs_invalid(lo, &head, NULL);
spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&head);
} else
spin_unlock(&inode->i_lock);
break;
case -ENOTSUPP: case -ENOTSUPP:
case -EOPNOTSUPP: case -EOPNOTSUPP:
NFS_SERVER(data->inode)->caps &= ~NFS_CAP_LAYOUTSTATS; NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS;
default: default:
dprintk("%s server returns %d\n", __func__, task->tk_status); break;
} }
dprintk("%s server returns %d\n", __func__, task->tk_status);
} }
static void static void
......
This diff is collapsed.
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
#include "nfs4idmap.h" #include "nfs4idmap.h"
#include "callback.h" #include "callback.h"
static const int nfs_set_port_min = 0; static const int nfs_set_port_min;
static const int nfs_set_port_max = 65535; static const int nfs_set_port_max = 65535;
static struct ctl_table_header *nfs4_callback_sysctl_table; static struct ctl_table_header *nfs4_callback_sysctl_table;
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include "internal.h" #include "internal.h"
#include "nfs4session.h" #include "nfs4session.h"
#include "callback.h" #include "callback.h"
#include "pnfs.h"
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include "nfs4trace.h" #include "nfs4trace.h"
......
This diff is collapsed.
...@@ -39,7 +39,6 @@ ...@@ -39,7 +39,6 @@
{ 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \ { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \
{ 1 << NFS_INO_FLUSHING, "FLUSHING" }, \ { 1 << NFS_INO_FLUSHING, "FLUSHING" }, \
{ 1 << NFS_INO_FSCACHE, "FSCACHE" }, \ { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \
{ 1 << NFS_INO_COMMIT, "COMMIT" }, \
{ 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \ { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \
{ 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" }) { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" })
......
...@@ -101,53 +101,18 @@ nfs_page_free(struct nfs_page *p) ...@@ -101,53 +101,18 @@ nfs_page_free(struct nfs_page *p)
kmem_cache_free(nfs_page_cachep, p); kmem_cache_free(nfs_page_cachep, p);
} }
static void
nfs_iocounter_inc(struct nfs_io_counter *c)
{
atomic_inc(&c->io_count);
}
static void
nfs_iocounter_dec(struct nfs_io_counter *c)
{
if (atomic_dec_and_test(&c->io_count)) {
clear_bit(NFS_IO_INPROGRESS, &c->flags);
smp_mb__after_atomic();
wake_up_bit(&c->flags, NFS_IO_INPROGRESS);
}
}
static int
__nfs_iocounter_wait(struct nfs_io_counter *c)
{
wait_queue_head_t *wq = bit_waitqueue(&c->flags, NFS_IO_INPROGRESS);
DEFINE_WAIT_BIT(q, &c->flags, NFS_IO_INPROGRESS);
int ret = 0;
do {
prepare_to_wait(wq, &q.wait, TASK_KILLABLE);
set_bit(NFS_IO_INPROGRESS, &c->flags);
if (atomic_read(&c->io_count) == 0)
break;
ret = nfs_wait_bit_killable(&q.key, TASK_KILLABLE);
} while (atomic_read(&c->io_count) != 0 && !ret);
finish_wait(wq, &q.wait);
return ret;
}
/** /**
* nfs_iocounter_wait - wait for i/o to complete * nfs_iocounter_wait - wait for i/o to complete
* @c: nfs_io_counter to use * @l_ctx: nfs_lock_context with io_counter to use
* *
* returns -ERESTARTSYS if interrupted by a fatal signal. * returns -ERESTARTSYS if interrupted by a fatal signal.
* Otherwise returns 0 once the io_count hits 0. * Otherwise returns 0 once the io_count hits 0.
*/ */
int int
nfs_iocounter_wait(struct nfs_io_counter *c) nfs_iocounter_wait(struct nfs_lock_context *l_ctx)
{ {
if (atomic_read(&c->io_count) == 0) return wait_on_atomic_t(&l_ctx->io_count, nfs_wait_atomic_killable,
return 0; TASK_KILLABLE);
return __nfs_iocounter_wait(c);
} }
/* /*
...@@ -370,7 +335,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page, ...@@ -370,7 +335,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
return ERR_CAST(l_ctx); return ERR_CAST(l_ctx);
} }
req->wb_lock_context = l_ctx; req->wb_lock_context = l_ctx;
nfs_iocounter_inc(&l_ctx->io_count); atomic_inc(&l_ctx->io_count);
/* Initialize the request struct. Initially, we assume a /* Initialize the request struct. Initially, we assume a
* long write-back delay. This will be adjusted in * long write-back delay. This will be adjusted in
...@@ -431,7 +396,8 @@ static void nfs_clear_request(struct nfs_page *req) ...@@ -431,7 +396,8 @@ static void nfs_clear_request(struct nfs_page *req)
req->wb_page = NULL; req->wb_page = NULL;
} }
if (l_ctx != NULL) { if (l_ctx != NULL) {
nfs_iocounter_dec(&l_ctx->io_count); if (atomic_dec_and_test(&l_ctx->io_count))
wake_up_atomic_t(&l_ctx->io_count);
nfs_put_lock_context(l_ctx); nfs_put_lock_context(l_ctx);
req->wb_lock_context = NULL; req->wb_lock_context = NULL;
} }
...@@ -664,22 +630,11 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio); ...@@ -664,22 +630,11 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
* @desc: IO descriptor * @desc: IO descriptor
* @hdr: pageio header * @hdr: pageio header
*/ */
static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, static void nfs_pgio_error(struct nfs_pgio_header *hdr)
struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_mirror *mirror;
u32 midx;
set_bit(NFS_IOHDR_REDO, &hdr->flags); set_bit(NFS_IOHDR_REDO, &hdr->flags);
nfs_pgio_data_destroy(hdr); nfs_pgio_data_destroy(hdr);
hdr->completion_ops->completion(hdr); hdr->completion_ops->completion(hdr);
/* TODO: Make sure it's right to clean up all mirrors here
* and not just hdr->pgio_mirror_idx */
for (midx = 0; midx < desc->pg_mirror_count; midx++) {
mirror = &desc->pg_mirrors[midx];
desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
}
return -ENOMEM;
} }
/** /**
...@@ -800,8 +755,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, ...@@ -800,8 +755,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
unsigned int pagecount, pageused; unsigned int pagecount, pageused;
pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
if (!nfs_pgarray_set(&hdr->page_array, pagecount)) if (!nfs_pgarray_set(&hdr->page_array, pagecount)) {
return nfs_pgio_error(desc, hdr); nfs_pgio_error(hdr);
desc->pg_error = -ENOMEM;
return desc->pg_error;
}
nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
pages = hdr->page_array.pagevec; pages = hdr->page_array.pagevec;
...@@ -819,8 +777,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, ...@@ -819,8 +777,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
*pages++ = last_page = req->wb_page; *pages++ = last_page = req->wb_page;
} }
} }
if (WARN_ON_ONCE(pageused != pagecount)) if (WARN_ON_ONCE(pageused != pagecount)) {
return nfs_pgio_error(desc, hdr); nfs_pgio_error(hdr);
desc->pg_error = -EINVAL;
return desc->pg_error;
}
if ((desc->pg_ioflags & FLUSH_COND_STABLE) && if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
(desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
...@@ -835,18 +796,13 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio); ...@@ -835,18 +796,13 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio);
static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
{ {
struct nfs_pgio_mirror *mirror;
struct nfs_pgio_header *hdr; struct nfs_pgio_header *hdr;
int ret; int ret;
mirror = nfs_pgio_current_mirror(desc);
hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
if (!hdr) { if (!hdr) {
/* TODO: make sure this is right with mirroring - or desc->pg_error = -ENOMEM;
* should it back out all mirrors? */ return desc->pg_error;
desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
return -ENOMEM;
} }
nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
ret = nfs_generic_pgio(desc, hdr); ret = nfs_generic_pgio(desc, hdr);
...@@ -874,6 +830,9 @@ static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, ...@@ -874,6 +830,9 @@ static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio,
mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req);
if (pgio->pg_error < 0)
return pgio->pg_error;
if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX)
return -EINVAL; return -EINVAL;
...@@ -903,12 +862,6 @@ static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) ...@@ -903,12 +862,6 @@ static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio)
pgio->pg_mirrors_dynamic = NULL; pgio->pg_mirrors_dynamic = NULL;
} }
static bool nfs_match_open_context(const struct nfs_open_context *ctx1,
const struct nfs_open_context *ctx2)
{
return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state;
}
static bool nfs_match_lock_context(const struct nfs_lock_context *l1, static bool nfs_match_lock_context(const struct nfs_lock_context *l1,
const struct nfs_lock_context *l2) const struct nfs_lock_context *l2)
{ {
...@@ -982,6 +935,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, ...@@ -982,6 +935,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
} else { } else {
if (desc->pg_ops->pg_init) if (desc->pg_ops->pg_init)
desc->pg_ops->pg_init(desc, req); desc->pg_ops->pg_init(desc, req);
if (desc->pg_error < 0)
return 0;
mirror->pg_base = req->wb_pgbase; mirror->pg_base = req->wb_pgbase;
} }
if (!nfs_can_coalesce_requests(prev, req, desc)) if (!nfs_can_coalesce_requests(prev, req, desc))
...@@ -1147,6 +1102,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, ...@@ -1147,6 +1102,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
bytes = req->wb_bytes; bytes = req->wb_bytes;
nfs_pageio_setup_mirroring(desc, req); nfs_pageio_setup_mirroring(desc, req);
if (desc->pg_error < 0)
goto out_failed;
for (midx = 0; midx < desc->pg_mirror_count; midx++) { for (midx = 0; midx < desc->pg_mirror_count; midx++) {
if (midx) { if (midx) {
...@@ -1163,7 +1120,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, ...@@ -1163,7 +1120,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
if (IS_ERR(dupreq)) { if (IS_ERR(dupreq)) {
nfs_page_group_unlock(req); nfs_page_group_unlock(req);
return 0; desc->pg_error = PTR_ERR(dupreq);
goto out_failed;
} }
nfs_lock_request(dupreq); nfs_lock_request(dupreq);
...@@ -1176,10 +1134,32 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, ...@@ -1176,10 +1134,32 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
if (nfs_pgio_has_mirroring(desc)) if (nfs_pgio_has_mirroring(desc))
desc->pg_mirror_idx = midx; desc->pg_mirror_idx = midx;
if (!nfs_pageio_add_request_mirror(desc, dupreq)) if (!nfs_pageio_add_request_mirror(desc, dupreq))
return 0; goto out_failed;
} }
return 1; return 1;
out_failed:
/*
* We might have failed before sending any reqs over wire.
* Clean up rest of the reqs in mirror pg_list.
*/
if (desc->pg_error) {
struct nfs_pgio_mirror *mirror;
void (*func)(struct list_head *);
/* remember fatal errors */
if (nfs_error_is_fatal(desc->pg_error))
mapping_set_error(desc->pg_inode->i_mapping,
desc->pg_error);
func = desc->pg_completion_ops->error_cleanup;
for (midx = 0; midx < desc->pg_mirror_count; midx++) {
mirror = &desc->pg_mirrors[midx];
func(&mirror->pg_list);
}
}
return 0;
} }
/* /*
...@@ -1232,7 +1212,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc, ...@@ -1232,7 +1212,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
nfs_pageio_complete(desc); nfs_pageio_complete(desc);
if (!list_empty(&failed)) { if (!list_empty(&failed)) {
list_move(&failed, &hdr->pages); list_move(&failed, &hdr->pages);
return -EIO; return desc->pg_error < 0 ? desc->pg_error : -EIO;
} }
return 0; return 0;
} }
......
This diff is collapsed.
...@@ -98,7 +98,6 @@ enum { ...@@ -98,7 +98,6 @@ enum {
NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */ NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */
NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */
NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */
NFS_LAYOUT_RETRY_LAYOUTGET, /* Retry layoutget */
}; };
enum layoutdriver_policy_flags { enum layoutdriver_policy_flags {
...@@ -261,11 +260,14 @@ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, ...@@ -261,11 +260,14 @@ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
bool update_barrier); bool update_barrier);
int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
struct pnfs_layout_hdr *lo, struct pnfs_layout_hdr *lo,
struct pnfs_layout_range *range, const struct pnfs_layout_range *range,
struct nfs4_state *open_state); struct nfs4_state *open_state);
int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list, struct list_head *tmp_list,
struct pnfs_layout_range *recall_range); const struct pnfs_layout_range *recall_range);
int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
const struct pnfs_layout_range *recall_range);
bool pnfs_roc(struct inode *ino); bool pnfs_roc(struct inode *ino);
void pnfs_roc_release(struct inode *ino); void pnfs_roc_release(struct inode *ino);
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
...@@ -379,26 +381,6 @@ nfs4_get_deviceid(struct nfs4_deviceid_node *d) ...@@ -379,26 +381,6 @@ nfs4_get_deviceid(struct nfs4_deviceid_node *d)
return d; return d;
} }
static inline void pnfs_set_retry_layoutget(struct pnfs_layout_hdr *lo)
{
if (!test_and_set_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags))
atomic_inc(&lo->plh_refcount);
}
static inline void pnfs_clear_retry_layoutget(struct pnfs_layout_hdr *lo)
{
if (test_and_clear_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) {
atomic_dec(&lo->plh_refcount);
/* wake up waiters for LAYOUTRETURN as that is not needed */
wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
}
}
static inline bool pnfs_should_retry_layoutget(struct pnfs_layout_hdr *lo)
{
return test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags);
}
static inline struct pnfs_layout_segment * static inline struct pnfs_layout_segment *
pnfs_get_lseg(struct pnfs_layout_segment *lseg) pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{ {
...@@ -409,6 +391,12 @@ pnfs_get_lseg(struct pnfs_layout_segment *lseg) ...@@ -409,6 +391,12 @@ pnfs_get_lseg(struct pnfs_layout_segment *lseg)
return lseg; return lseg;
} }
static inline bool
pnfs_is_valid_lseg(struct pnfs_layout_segment *lseg)
{
return test_bit(NFS_LSEG_VALID, &lseg->pls_flags) != 0;
}
/* Return true if a layout driver is being used for this mountpoint */ /* Return true if a layout driver is being used for this mountpoint */
static inline int pnfs_enabled_sb(struct nfs_server *nfss) static inline int pnfs_enabled_sb(struct nfs_server *nfss)
{ {
...@@ -556,6 +544,26 @@ pnfs_calc_offset_length(u64 offset, u64 end) ...@@ -556,6 +544,26 @@ pnfs_calc_offset_length(u64 offset, u64 end)
return 1 + end - offset; return 1 + end - offset;
} }
/**
* pnfs_mark_layout_returned_if_empty - marks the layout as returned
* @lo: layout header
*
* Note: Caller must hold inode->i_lock
*/
static inline void
pnfs_mark_layout_returned_if_empty(struct pnfs_layout_hdr *lo)
{
if (list_empty(&lo->plh_segs))
set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
}
static inline void
pnfs_copy_range(struct pnfs_layout_range *dst,
const struct pnfs_layout_range *src)
{
memcpy(dst, src, sizeof(*dst));
}
extern unsigned int layoutstats_timer; extern unsigned int layoutstats_timer;
#ifdef NFS_DEBUG #ifdef NFS_DEBUG
......
...@@ -266,17 +266,14 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, ...@@ -266,17 +266,14 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
} else { } else {
nfs_retry_commit(mds_pages, NULL, cinfo, 0); nfs_retry_commit(mds_pages, NULL, cinfo, 0);
pnfs_generic_retry_commit(cinfo, 0); pnfs_generic_retry_commit(cinfo, 0);
cinfo->completion_ops->error_cleanup(NFS_I(inode));
return -ENOMEM; return -ENOMEM;
} }
} }
nreq += pnfs_generic_alloc_ds_commits(cinfo, &list); nreq += pnfs_generic_alloc_ds_commits(cinfo, &list);
if (nreq == 0) { if (nreq == 0)
cinfo->completion_ops->error_cleanup(NFS_I(inode));
goto out; goto out;
}
atomic_add(nreq, &cinfo->mds->rpcs_out); atomic_add(nreq, &cinfo->mds->rpcs_out);
...@@ -871,6 +868,11 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, ...@@ -871,6 +868,11 @@ pnfs_layout_mark_request_commit(struct nfs_page *req,
buckets = cinfo->ds->buckets; buckets = cinfo->ds->buckets;
list = &buckets[ds_commit_idx].written; list = &buckets[ds_commit_idx].written;
if (list_empty(list)) { if (list_empty(list)) {
if (!pnfs_is_valid_lseg(lseg)) {
spin_unlock(cinfo->lock);
cinfo->completion_ops->resched_write(cinfo, req);
return;
}
/* Non-empty buckets hold a reference on the lseg. That ref /* Non-empty buckets hold a reference on the lseg. That ref
* is normally transferred to the COMMIT call and released * is normally transferred to the COMMIT call and released
* there. It could also be released if the last req is pulled * there. It could also be released if the last req is pulled
......
...@@ -85,6 +85,23 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) ...@@ -85,6 +85,23 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
} }
EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
static void nfs_readpage_release(struct nfs_page *req)
{
struct inode *inode = d_inode(req->wb_context->dentry);
dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(inode), req->wb_bytes,
(long long)req_offset(req));
if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
if (PageUptodate(req->wb_page))
nfs_readpage_to_fscache(inode, req->wb_page, 0);
unlock_page(req->wb_page);
}
nfs_release_request(req);
}
int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
struct page *page) struct page *page)
{ {
...@@ -106,7 +123,10 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, ...@@ -106,7 +123,10 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
nfs_pageio_init_read(&pgio, inode, false, nfs_pageio_init_read(&pgio, inode, false,
&nfs_async_read_completion_ops); &nfs_async_read_completion_ops);
nfs_pageio_add_request(&pgio, new); if (!nfs_pageio_add_request(&pgio, new)) {
nfs_list_remove_request(new);
nfs_readpage_release(new);
}
nfs_pageio_complete(&pgio); nfs_pageio_complete(&pgio);
/* It doesn't make sense to do mirrored reads! */ /* It doesn't make sense to do mirrored reads! */
...@@ -115,24 +135,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, ...@@ -115,24 +135,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
pgm = &pgio.pg_mirrors[0]; pgm = &pgio.pg_mirrors[0];
NFS_I(inode)->read_io += pgm->pg_bytes_written; NFS_I(inode)->read_io += pgm->pg_bytes_written;
return 0; return pgio.pg_error < 0 ? pgio.pg_error : 0;
}
static void nfs_readpage_release(struct nfs_page *req)
{
struct inode *inode = d_inode(req->wb_context->dentry);
dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(inode), req->wb_bytes,
(long long)req_offset(req));
if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
if (PageUptodate(req->wb_page))
nfs_readpage_to_fscache(inode, req->wb_page, 0);
unlock_page(req->wb_page);
}
nfs_release_request(req);
} }
static void nfs_page_group_set_uptodate(struct nfs_page *req) static void nfs_page_group_set_uptodate(struct nfs_page *req)
...@@ -361,6 +364,8 @@ readpage_async_filler(void *data, struct page *page) ...@@ -361,6 +364,8 @@ readpage_async_filler(void *data, struct page *page)
if (len < PAGE_CACHE_SIZE) if (len < PAGE_CACHE_SIZE)
zero_user_segment(page, len, PAGE_CACHE_SIZE); zero_user_segment(page, len, PAGE_CACHE_SIZE);
if (!nfs_pageio_add_request(desc->pgio, new)) { if (!nfs_pageio_add_request(desc->pgio, new)) {
nfs_list_remove_request(new);
nfs_readpage_release(new);
error = desc->pgio->pg_error; error = desc->pgio->pg_error;
goto out_unlock; goto out_unlock;
} }
......
...@@ -21,6 +21,8 @@ ...@@ -21,6 +21,8 @@
#include <linux/nfs_page.h> #include <linux/nfs_page.h>
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#include <linux/export.h> #include <linux/export.h>
#include <linux/freezer.h>
#include <linux/wait.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
...@@ -244,11 +246,9 @@ static int wb_priority(struct writeback_control *wbc) ...@@ -244,11 +246,9 @@ static int wb_priority(struct writeback_control *wbc)
{ {
int ret = 0; int ret = 0;
if (wbc->for_reclaim) if (wbc->for_reclaim)
return FLUSH_HIGHPRI | FLUSH_STABLE; return FLUSH_HIGHPRI | FLUSH_COND_STABLE;
if (wbc->sync_mode == WB_SYNC_ALL) if (wbc->sync_mode == WB_SYNC_ALL)
ret = FLUSH_COND_STABLE; ret = FLUSH_COND_STABLE;
if (wbc->for_kupdate || wbc->for_background)
ret |= FLUSH_LOWPRI;
return ret; return ret;
} }
...@@ -545,12 +545,22 @@ nfs_lock_and_join_requests(struct page *page, bool nonblock) ...@@ -545,12 +545,22 @@ nfs_lock_and_join_requests(struct page *page, bool nonblock)
return head; return head;
} }
static void nfs_write_error_remove_page(struct nfs_page *req)
{
nfs_unlock_request(req);
nfs_end_page_writeback(req);
nfs_release_request(req);
generic_error_remove_page(page_file_mapping(req->wb_page),
req->wb_page);
}
/* /*
* Find an associated nfs write request, and prepare to flush it out * Find an associated nfs write request, and prepare to flush it out
* May return an error if the user signalled nfs_wait_on_request(). * May return an error if the user signalled nfs_wait_on_request().
*/ */
static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
struct page *page, bool nonblock) struct page *page, bool nonblock,
bool launder)
{ {
struct nfs_page *req; struct nfs_page *req;
int ret = 0; int ret = 0;
...@@ -567,8 +577,21 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, ...@@ -567,8 +577,21 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
ret = 0; ret = 0;
if (!nfs_pageio_add_request(pgio, req)) { if (!nfs_pageio_add_request(pgio, req)) {
nfs_redirty_request(req);
ret = pgio->pg_error; ret = pgio->pg_error;
/*
* Remove the problematic req upon fatal errors
* in launder case, while other dirty pages can
* still be around until they get flushed.
*/
if (nfs_error_is_fatal(ret)) {
nfs_context_set_write_error(req->wb_context, ret);
if (launder) {
nfs_write_error_remove_page(req);
goto out;
}
}
nfs_redirty_request(req);
ret = -EAGAIN;
} else } else
nfs_add_stats(page_file_mapping(page)->host, nfs_add_stats(page_file_mapping(page)->host,
NFSIOS_WRITEPAGES, 1); NFSIOS_WRITEPAGES, 1);
...@@ -576,12 +599,14 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, ...@@ -576,12 +599,14 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
return ret; return ret;
} }
static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
struct nfs_pageio_descriptor *pgio, bool launder)
{ {
int ret; int ret;
nfs_pageio_cond_complete(pgio, page_file_index(page)); nfs_pageio_cond_complete(pgio, page_file_index(page));
ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE,
launder);
if (ret == -EAGAIN) { if (ret == -EAGAIN) {
redirty_page_for_writepage(wbc, page); redirty_page_for_writepage(wbc, page);
ret = 0; ret = 0;
...@@ -592,7 +617,9 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st ...@@ -592,7 +617,9 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st
/* /*
* Write an mmapped page to the server. * Write an mmapped page to the server.
*/ */
static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) static int nfs_writepage_locked(struct page *page,
struct writeback_control *wbc,
bool launder)
{ {
struct nfs_pageio_descriptor pgio; struct nfs_pageio_descriptor pgio;
struct inode *inode = page_file_mapping(page)->host; struct inode *inode = page_file_mapping(page)->host;
...@@ -601,7 +628,7 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc ...@@ -601,7 +628,7 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
false, &nfs_async_write_completion_ops); false, &nfs_async_write_completion_ops);
err = nfs_do_writepage(page, wbc, &pgio); err = nfs_do_writepage(page, wbc, &pgio, launder);
nfs_pageio_complete(&pgio); nfs_pageio_complete(&pgio);
if (err < 0) if (err < 0)
return err; return err;
...@@ -614,7 +641,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc) ...@@ -614,7 +641,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
{ {
int ret; int ret;
ret = nfs_writepage_locked(page, wbc); ret = nfs_writepage_locked(page, wbc, false);
unlock_page(page); unlock_page(page);
return ret; return ret;
} }
...@@ -623,7 +650,7 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control * ...@@ -623,7 +650,7 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
{ {
int ret; int ret;
ret = nfs_do_writepage(page, wbc, data); ret = nfs_do_writepage(page, wbc, data, false);
unlock_page(page); unlock_page(page);
return ret; return ret;
} }
...@@ -1128,7 +1155,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page) ...@@ -1128,7 +1155,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
if (req == NULL) if (req == NULL)
return 0; return 0;
l_ctx = req->wb_lock_context; l_ctx = req->wb_lock_context;
do_flush = req->wb_page != page || req->wb_context != ctx; do_flush = req->wb_page != page ||
!nfs_match_open_context(req->wb_context, ctx);
/* for now, flush if more than 1 request in page_group */ /* for now, flush if more than 1 request in page_group */
do_flush |= req->wb_this_page != req; do_flush |= req->wb_this_page != req;
if (l_ctx && flctx && if (l_ctx && flctx &&
...@@ -1326,9 +1354,15 @@ static void nfs_async_write_error(struct list_head *head) ...@@ -1326,9 +1354,15 @@ static void nfs_async_write_error(struct list_head *head)
} }
} }
static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
{
nfs_async_write_error(&hdr->pages);
}
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
.error_cleanup = nfs_async_write_error, .error_cleanup = nfs_async_write_error,
.completion = nfs_write_completion, .completion = nfs_write_completion,
.reschedule_io = nfs_async_write_reschedule_io,
}; };
void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
...@@ -1529,27 +1563,21 @@ static void nfs_writeback_result(struct rpc_task *task, ...@@ -1529,27 +1563,21 @@ static void nfs_writeback_result(struct rpc_task *task,
} }
} }
static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
{ {
int ret; return wait_on_atomic_t(&cinfo->rpcs_out,
nfs_wait_atomic_killable, TASK_KILLABLE);
}
if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
return 1; {
if (!may_wait) atomic_inc(&cinfo->rpcs_out);
return 0;
ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
NFS_INO_COMMIT,
nfs_wait_bit_killable,
TASK_KILLABLE);
return (ret < 0) ? ret : 1;
} }
static void nfs_commit_clear_lock(struct nfs_inode *nfsi) static void nfs_commit_end(struct nfs_mds_commit_info *cinfo)
{ {
clear_bit(NFS_INO_COMMIT, &nfsi->flags); if (atomic_dec_and_test(&cinfo->rpcs_out))
smp_mb__after_atomic(); wake_up_atomic_t(&cinfo->rpcs_out);
wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
} }
void nfs_commitdata_release(struct nfs_commit_data *data) void nfs_commitdata_release(struct nfs_commit_data *data)
...@@ -1666,6 +1694,13 @@ void nfs_retry_commit(struct list_head *page_list, ...@@ -1666,6 +1694,13 @@ void nfs_retry_commit(struct list_head *page_list,
} }
EXPORT_SYMBOL_GPL(nfs_retry_commit); EXPORT_SYMBOL_GPL(nfs_retry_commit);
static void
nfs_commit_resched_write(struct nfs_commit_info *cinfo,
struct nfs_page *req)
{
__set_page_dirty_nobuffers(req->wb_page);
}
/* /*
* Commit dirty pages * Commit dirty pages
*/ */
...@@ -1687,7 +1722,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, ...@@ -1687,7 +1722,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
data->mds_ops, how, 0); data->mds_ops, how, 0);
out_bad: out_bad:
nfs_retry_commit(head, NULL, cinfo, 0); nfs_retry_commit(head, NULL, cinfo, 0);
cinfo->completion_ops->error_cleanup(NFS_I(inode));
return -ENOMEM; return -ENOMEM;
} }
...@@ -1749,8 +1783,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) ...@@ -1749,8 +1783,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
nfs_init_cinfo(&cinfo, data->inode, data->dreq); nfs_init_cinfo(&cinfo, data->inode, data->dreq);
if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) nfs_commit_end(cinfo.mds);
nfs_commit_clear_lock(NFS_I(data->inode));
} }
static void nfs_commit_release(void *calldata) static void nfs_commit_release(void *calldata)
...@@ -1769,7 +1802,7 @@ static const struct rpc_call_ops nfs_commit_ops = { ...@@ -1769,7 +1802,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
.completion = nfs_commit_release_pages, .completion = nfs_commit_release_pages,
.error_cleanup = nfs_commit_clear_lock, .resched_write = nfs_commit_resched_write,
}; };
int nfs_generic_commit_list(struct inode *inode, struct list_head *head, int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
...@@ -1788,30 +1821,25 @@ int nfs_commit_inode(struct inode *inode, int how) ...@@ -1788,30 +1821,25 @@ int nfs_commit_inode(struct inode *inode, int how)
LIST_HEAD(head); LIST_HEAD(head);
struct nfs_commit_info cinfo; struct nfs_commit_info cinfo;
int may_wait = how & FLUSH_SYNC; int may_wait = how & FLUSH_SYNC;
int error = 0;
int res; int res;
res = nfs_commit_set_lock(NFS_I(inode), may_wait);
if (res <= 0)
goto out_mark_dirty;
nfs_init_cinfo_from_inode(&cinfo, inode); nfs_init_cinfo_from_inode(&cinfo, inode);
nfs_commit_begin(cinfo.mds);
res = nfs_scan_commit(inode, &head, &cinfo); res = nfs_scan_commit(inode, &head, &cinfo);
if (res) { if (res)
int error;
error = nfs_generic_commit_list(inode, &head, how, &cinfo); error = nfs_generic_commit_list(inode, &head, how, &cinfo);
nfs_commit_end(cinfo.mds);
if (error < 0) if (error < 0)
return error; goto out_error;
if (!may_wait) if (!may_wait)
goto out_mark_dirty; goto out_mark_dirty;
error = wait_on_bit_action(&NFS_I(inode)->flags, error = wait_on_commit(cinfo.mds);
NFS_INO_COMMIT,
nfs_wait_bit_killable,
TASK_KILLABLE);
if (error < 0) if (error < 0)
return error; return error;
} else
nfs_commit_clear_lock(NFS_I(inode));
return res; return res;
out_error:
res = error;
/* Note: If we exit without ensuring that the commit is complete, /* Note: If we exit without ensuring that the commit is complete,
* we must mark the inode as dirty. Otherwise, future calls to * we must mark the inode as dirty. Otherwise, future calls to
* sync_inode() with the WB_SYNC_ALL flag set will fail to ensure * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
...@@ -1821,6 +1849,7 @@ int nfs_commit_inode(struct inode *inode, int how) ...@@ -1821,6 +1849,7 @@ int nfs_commit_inode(struct inode *inode, int how)
__mark_inode_dirty(inode, I_DIRTY_DATASYNC); __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
return res; return res;
} }
EXPORT_SYMBOL_GPL(nfs_commit_inode);
int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
{ {
...@@ -1911,7 +1940,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) ...@@ -1911,7 +1940,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
/* /*
* Write back all requests on one page - we do this before reading it. * Write back all requests on one page - we do this before reading it.
*/ */
int nfs_wb_page(struct inode *inode, struct page *page) int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder)
{ {
loff_t range_start = page_file_offset(page); loff_t range_start = page_file_offset(page);
loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
...@@ -1928,7 +1957,7 @@ int nfs_wb_page(struct inode *inode, struct page *page) ...@@ -1928,7 +1957,7 @@ int nfs_wb_page(struct inode *inode, struct page *page)
for (;;) { for (;;) {
wait_on_page_writeback(page); wait_on_page_writeback(page);
if (clear_page_dirty_for_io(page)) { if (clear_page_dirty_for_io(page)) {
ret = nfs_writepage_locked(page, &wbc); ret = nfs_writepage_locked(page, &wbc, launder);
if (ret < 0) if (ret < 0)
goto out_error; goto out_error;
continue; continue;
......
...@@ -592,4 +592,18 @@ enum data_content4 { ...@@ -592,4 +592,18 @@ enum data_content4 {
NFS4_CONTENT_HOLE = 1, NFS4_CONTENT_HOLE = 1,
}; };
enum pnfs_update_layout_reason {
PNFS_UPDATE_LAYOUT_UNKNOWN = 0,
PNFS_UPDATE_LAYOUT_NO_PNFS,
PNFS_UPDATE_LAYOUT_RD_ZEROLEN,
PNFS_UPDATE_LAYOUT_MDSTHRESH,
PNFS_UPDATE_LAYOUT_NOMEM,
PNFS_UPDATE_LAYOUT_BULK_RECALL,
PNFS_UPDATE_LAYOUT_IO_TEST_FAIL,
PNFS_UPDATE_LAYOUT_FOUND_CACHED,
PNFS_UPDATE_LAYOUT_RETURN,
PNFS_UPDATE_LAYOUT_BLOCKED,
PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET,
};
#endif #endif
...@@ -60,18 +60,12 @@ struct nfs_lockowner { ...@@ -60,18 +60,12 @@ struct nfs_lockowner {
pid_t l_pid; pid_t l_pid;
}; };
#define NFS_IO_INPROGRESS 0
struct nfs_io_counter {
unsigned long flags;
atomic_t io_count;
};
struct nfs_lock_context { struct nfs_lock_context {
atomic_t count; atomic_t count;
struct list_head list; struct list_head list;
struct nfs_open_context *open_context; struct nfs_open_context *open_context;
struct nfs_lockowner lockowner; struct nfs_lockowner lockowner;
struct nfs_io_counter io_count; atomic_t io_count;
}; };
struct nfs4_state; struct nfs4_state;
...@@ -216,7 +210,6 @@ struct nfs_inode { ...@@ -216,7 +210,6 @@ struct nfs_inode {
#define NFS_INO_FLUSHING (4) /* inode is flushing out data */ #define NFS_INO_FLUSHING (4) /* inode is flushing out data */
#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */
#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */
#define NFS_INO_COMMIT (7) /* inode is committing unstable writes */
#define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */
#define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */
#define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */
...@@ -518,12 +511,24 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned ...@@ -518,12 +511,24 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned
*/ */
extern int nfs_sync_inode(struct inode *inode); extern int nfs_sync_inode(struct inode *inode);
extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_all(struct inode *inode);
extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder);
extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
extern int nfs_commit_inode(struct inode *, int); extern int nfs_commit_inode(struct inode *, int);
extern struct nfs_commit_data *nfs_commitdata_alloc(void); extern struct nfs_commit_data *nfs_commitdata_alloc(void);
extern void nfs_commit_free(struct nfs_commit_data *data); extern void nfs_commit_free(struct nfs_commit_data *data);
static inline int
nfs_wb_launder_page(struct inode *inode, struct page *page)
{
return nfs_wb_single_page(inode, page, true);
}
static inline int
nfs_wb_page(struct inode *inode, struct page *page)
{
return nfs_wb_single_page(inode, page, false);
}
static inline int static inline int
nfs_have_writebacks(struct inode *inode) nfs_have_writebacks(struct inode *inode)
{ {
......
...@@ -102,6 +102,7 @@ struct nfs_client { ...@@ -102,6 +102,7 @@ struct nfs_client {
#define NFS_SP4_MACH_CRED_STATEID 4 /* TEST_STATEID and FREE_STATEID */ #define NFS_SP4_MACH_CRED_STATEID 4 /* TEST_STATEID and FREE_STATEID */
#define NFS_SP4_MACH_CRED_WRITE 5 /* WRITE */ #define NFS_SP4_MACH_CRED_WRITE 5 /* WRITE */
#define NFS_SP4_MACH_CRED_COMMIT 6 /* COMMIT */ #define NFS_SP4_MACH_CRED_COMMIT 6 /* COMMIT */
#define NFS_SP4_MACH_CRED_PNFS_CLEANUP 7 /* LAYOUTRETURN */
#endif /* CONFIG_NFS_V4 */ #endif /* CONFIG_NFS_V4 */
/* Our own IP address, as a null-terminated string. /* Our own IP address, as a null-terminated string.
......
...@@ -1375,6 +1375,7 @@ enum { ...@@ -1375,6 +1375,7 @@ enum {
NFS_IOHDR_ERROR = 0, NFS_IOHDR_ERROR = 0,
NFS_IOHDR_EOF, NFS_IOHDR_EOF,
NFS_IOHDR_REDO, NFS_IOHDR_REDO,
NFS_IOHDR_STAT,
}; };
struct nfs_pgio_header { struct nfs_pgio_header {
...@@ -1420,11 +1421,12 @@ struct nfs_mds_commit_info { ...@@ -1420,11 +1421,12 @@ struct nfs_mds_commit_info {
struct list_head list; struct list_head list;
}; };
struct nfs_commit_info;
struct nfs_commit_data; struct nfs_commit_data;
struct nfs_inode; struct nfs_inode;
struct nfs_commit_completion_ops { struct nfs_commit_completion_ops {
void (*error_cleanup) (struct nfs_inode *nfsi);
void (*completion) (struct nfs_commit_data *data); void (*completion) (struct nfs_commit_data *data);
void (*resched_write) (struct nfs_commit_info *, struct nfs_page *);
}; };
struct nfs_commit_info { struct nfs_commit_info {
...@@ -1454,12 +1456,14 @@ struct nfs_commit_data { ...@@ -1454,12 +1456,14 @@ struct nfs_commit_data {
const struct rpc_call_ops *mds_ops; const struct rpc_call_ops *mds_ops;
const struct nfs_commit_completion_ops *completion_ops; const struct nfs_commit_completion_ops *completion_ops;
int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data); int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data);
unsigned long flags;
}; };
struct nfs_pgio_completion_ops { struct nfs_pgio_completion_ops {
void (*error_cleanup)(struct list_head *head); void (*error_cleanup)(struct list_head *head);
void (*init_hdr)(struct nfs_pgio_header *hdr); void (*init_hdr)(struct nfs_pgio_header *hdr);
void (*completion)(struct nfs_pgio_header *hdr); void (*completion)(struct nfs_pgio_header *hdr);
void (*reschedule_io)(struct nfs_pgio_header *hdr);
}; };
struct nfs_unlinkdata { struct nfs_unlinkdata {
......
...@@ -1217,6 +1217,7 @@ static int rpc_anyaddr(int family, struct sockaddr *buf, size_t buflen) ...@@ -1217,6 +1217,7 @@ static int rpc_anyaddr(int family, struct sockaddr *buf, size_t buflen)
return -EINVAL; return -EINVAL;
memcpy(buf, &rpc_in6addr_loopback, memcpy(buf, &rpc_in6addr_loopback,
sizeof(rpc_in6addr_loopback)); sizeof(rpc_in6addr_loopback));
break;
default: default:
dprintk("RPC: %s: address family not supported\n", dprintk("RPC: %s: address family not supported\n",
__func__); __func__);
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
# define RPCDBG_FACILITY RPCDBG_TRANS # define RPCDBG_FACILITY RPCDBG_TRANS
#endif #endif
#define RPCRDMA_BACKCHANNEL_DEBUG #undef RPCRDMA_BACKCHANNEL_DEBUG
static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst *rqst) struct rpc_rqst *rqst)
...@@ -42,8 +42,8 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, ...@@ -42,8 +42,8 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
size_t size; size_t size;
req = rpcrdma_create_req(r_xprt); req = rpcrdma_create_req(r_xprt);
if (!req) if (IS_ERR(req))
return -ENOMEM; return PTR_ERR(req);
req->rl_backchannel = true; req->rl_backchannel = true;
size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst);
...@@ -84,9 +84,7 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, ...@@ -84,9 +84,7 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
unsigned int count) unsigned int count)
{ {
struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
struct rpcrdma_rep *rep; struct rpcrdma_rep *rep;
unsigned long flags;
int rc = 0; int rc = 0;
while (count--) { while (count--) {
...@@ -98,9 +96,7 @@ static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, ...@@ -98,9 +96,7 @@ static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
break; break;
} }
spin_lock_irqsave(&buffers->rb_lock, flags); rpcrdma_recv_buffer_put(rep);
list_add(&rep->rr_list, &buffers->rb_recv_bufs);
spin_unlock_irqrestore(&buffers->rb_lock, flags);
} }
return rc; return rc;
...@@ -140,6 +136,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) ...@@ -140,6 +136,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
__func__); __func__);
goto out_free; goto out_free;
} }
dprintk("RPC: %s: new rqst %p\n", __func__, rqst);
rqst->rq_xprt = &r_xprt->rx_xprt; rqst->rq_xprt = &r_xprt->rx_xprt;
INIT_LIST_HEAD(&rqst->rq_list); INIT_LIST_HEAD(&rqst->rq_list);
...@@ -220,12 +217,14 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) ...@@ -220,12 +217,14 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
rpclen = rqst->rq_svec[0].iov_len; rpclen = rqst->rq_svec[0].iov_len;
#ifdef RPCRDMA_BACKCHANNEL_DEBUG
pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n", pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n",
__func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf)); __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf));
pr_info("RPC: %s: RPC/RDMA: %*ph\n", pr_info("RPC: %s: RPC/RDMA: %*ph\n",
__func__, (int)RPCRDMA_HDRLEN_MIN, headerp); __func__, (int)RPCRDMA_HDRLEN_MIN, headerp);
pr_info("RPC: %s: RPC: %*ph\n", pr_info("RPC: %s: RPC: %*ph\n",
__func__, (int)rpclen, rqst->rq_svec[0].iov_base); __func__, (int)rpclen, rqst->rq_svec[0].iov_base);
#endif
req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf);
req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN; req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN;
...@@ -269,6 +268,9 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) ...@@ -269,6 +268,9 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
{ {
struct rpc_xprt *xprt = rqst->rq_xprt; struct rpc_xprt *xprt = rqst->rq_xprt;
dprintk("RPC: %s: freeing rqst %p (req %p)\n",
__func__, rqst, rpcr_to_rdmar(rqst));
smp_mb__before_atomic(); smp_mb__before_atomic();
WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)); WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
...@@ -333,9 +335,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, ...@@ -333,9 +335,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst, rq_bc_pa_list); struct rpc_rqst, rq_bc_pa_list);
list_del(&rqst->rq_bc_pa_list); list_del(&rqst->rq_bc_pa_list);
spin_unlock(&xprt->bc_pa_lock); spin_unlock(&xprt->bc_pa_lock);
#ifdef RPCRDMA_BACKCHANNEL_DEBUG dprintk("RPC: %s: using rqst %p\n", __func__, rqst);
pr_info("RPC: %s: using rqst %p\n", __func__, rqst);
#endif
/* Prepare rqst */ /* Prepare rqst */
rqst->rq_reply_bytes_recvd = 0; rqst->rq_reply_bytes_recvd = 0;
...@@ -355,10 +355,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, ...@@ -355,10 +355,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
* direction reply. * direction reply.
*/ */
req = rpcr_to_rdmar(rqst); req = rpcr_to_rdmar(rqst);
#ifdef RPCRDMA_BACKCHANNEL_DEBUG dprintk("RPC: %s: attaching rep %p to req %p\n",
pr_info("RPC: %s: attaching rep %p to req %p\n",
__func__, rep, req); __func__, rep, req);
#endif
req->rl_reply = rep; req->rl_reply = rep;
/* Defeat the retransmit detection logic in send_request */ /* Defeat the retransmit detection logic in send_request */
......
...@@ -179,6 +179,69 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -179,6 +179,69 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
return rc; return rc;
} }
static void
__fmr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
{
struct ib_device *device = r_xprt->rx_ia.ri_device;
struct rpcrdma_mw *mw = seg->rl_mw;
int nsegs = seg->mr_nsegs;
seg->rl_mw = NULL;
while (nsegs--)
rpcrdma_unmap_one(device, seg++);
rpcrdma_put_mw(r_xprt, mw);
}
/* Invalidate all memory regions that were registered for "req".
*
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*/
static void
fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct rpcrdma_mr_seg *seg;
unsigned int i, nchunks;
struct rpcrdma_mw *mw;
LIST_HEAD(unmap_list);
int rc;
dprintk("RPC: %s: req %p\n", __func__, req);
/* ORDER: Invalidate all of the req's MRs first
*
* ib_unmap_fmr() is slow, so use a single call instead
* of one call per mapped MR.
*/
for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
seg = &req->rl_segments[i];
mw = seg->rl_mw;
list_add(&mw->r.fmr.fmr->list, &unmap_list);
i += seg->mr_nsegs;
}
rc = ib_unmap_fmr(&unmap_list);
if (rc)
pr_warn("%s: ib_unmap_fmr failed (%i)\n", __func__, rc);
/* ORDER: Now DMA unmap all of the req's MRs, and return
* them to the free MW list.
*/
for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
seg = &req->rl_segments[i];
__fmr_dma_unmap(r_xprt, seg);
i += seg->mr_nsegs;
seg->mr_nsegs = 0;
}
req->rl_nchunks = 0;
}
/* Use the ib_unmap_fmr() verb to prevent further remote /* Use the ib_unmap_fmr() verb to prevent further remote
* access via RDMA READ or RDMA WRITE. * access via RDMA READ or RDMA WRITE.
*/ */
...@@ -231,6 +294,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf) ...@@ -231,6 +294,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)
const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_map = fmr_op_map, .ro_map = fmr_op_map,
.ro_unmap_sync = fmr_op_unmap_sync,
.ro_unmap = fmr_op_unmap, .ro_unmap = fmr_op_unmap,
.ro_open = fmr_op_open, .ro_open = fmr_op_open,
.ro_maxpages = fmr_op_maxpages, .ro_maxpages = fmr_op_maxpages,
......
...@@ -245,12 +245,14 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) ...@@ -245,12 +245,14 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
} }
/* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */ /* If FAST_REG or LOCAL_INV failed, indicate the frmr needs
* to be reset.
*
* WARNING: Only wr_id and status are reliable at this point
*/
static void static void
frwr_sendcompletion(struct ib_wc *wc) __frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_mw *r)
{ {
struct rpcrdma_mw *r;
if (likely(wc->status == IB_WC_SUCCESS)) if (likely(wc->status == IB_WC_SUCCESS))
return; return;
...@@ -261,9 +263,23 @@ frwr_sendcompletion(struct ib_wc *wc) ...@@ -261,9 +263,23 @@ frwr_sendcompletion(struct ib_wc *wc)
else else
pr_warn("RPC: %s: frmr %p error, status %s (%d)\n", pr_warn("RPC: %s: frmr %p error, status %s (%d)\n",
__func__, r, ib_wc_status_msg(wc->status), wc->status); __func__, r, ib_wc_status_msg(wc->status), wc->status);
r->r.frmr.fr_state = FRMR_IS_STALE; r->r.frmr.fr_state = FRMR_IS_STALE;
} }
static void
frwr_sendcompletion(struct ib_wc *wc)
{
struct rpcrdma_mw *r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
struct rpcrdma_frmr *f = &r->r.frmr;
if (unlikely(wc->status != IB_WC_SUCCESS))
__frwr_sendcompletion_flush(wc, r);
if (f->fr_waiter)
complete(&f->fr_linv_done);
}
static int static int
frwr_op_init(struct rpcrdma_xprt *r_xprt) frwr_op_init(struct rpcrdma_xprt *r_xprt)
{ {
...@@ -319,7 +335,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -319,7 +335,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
struct rpcrdma_mw *mw; struct rpcrdma_mw *mw;
struct rpcrdma_frmr *frmr; struct rpcrdma_frmr *frmr;
struct ib_mr *mr; struct ib_mr *mr;
struct ib_reg_wr reg_wr; struct ib_reg_wr *reg_wr;
struct ib_send_wr *bad_wr; struct ib_send_wr *bad_wr;
int rc, i, n, dma_nents; int rc, i, n, dma_nents;
u8 key; u8 key;
...@@ -335,7 +351,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -335,7 +351,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
} while (mw->r.frmr.fr_state != FRMR_IS_INVALID); } while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
frmr = &mw->r.frmr; frmr = &mw->r.frmr;
frmr->fr_state = FRMR_IS_VALID; frmr->fr_state = FRMR_IS_VALID;
frmr->fr_waiter = false;
mr = frmr->fr_mr; mr = frmr->fr_mr;
reg_wr = &frmr->fr_regwr;
if (nsegs > ia->ri_max_frmr_depth) if (nsegs > ia->ri_max_frmr_depth)
nsegs = ia->ri_max_frmr_depth; nsegs = ia->ri_max_frmr_depth;
...@@ -381,19 +399,19 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -381,19 +399,19 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
key = (u8)(mr->rkey & 0x000000FF); key = (u8)(mr->rkey & 0x000000FF);
ib_update_fast_reg_key(mr, ++key); ib_update_fast_reg_key(mr, ++key);
reg_wr.wr.next = NULL; reg_wr->wr.next = NULL;
reg_wr.wr.opcode = IB_WR_REG_MR; reg_wr->wr.opcode = IB_WR_REG_MR;
reg_wr.wr.wr_id = (uintptr_t)mw; reg_wr->wr.wr_id = (uintptr_t)mw;
reg_wr.wr.num_sge = 0; reg_wr->wr.num_sge = 0;
reg_wr.wr.send_flags = 0; reg_wr->wr.send_flags = 0;
reg_wr.mr = mr; reg_wr->mr = mr;
reg_wr.key = mr->rkey; reg_wr->key = mr->rkey;
reg_wr.access = writing ? reg_wr->access = writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ; IB_ACCESS_REMOTE_READ;
DECR_CQCOUNT(&r_xprt->rx_ep); DECR_CQCOUNT(&r_xprt->rx_ep);
rc = ib_post_send(ia->ri_id->qp, &reg_wr.wr, &bad_wr); rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
if (rc) if (rc)
goto out_senderr; goto out_senderr;
...@@ -413,6 +431,116 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -413,6 +431,116 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
return rc; return rc;
} }
static struct ib_send_wr *
__frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg)
{
struct rpcrdma_mw *mw = seg->rl_mw;
struct rpcrdma_frmr *f = &mw->r.frmr;
struct ib_send_wr *invalidate_wr;
f->fr_waiter = false;
f->fr_state = FRMR_IS_INVALID;
invalidate_wr = &f->fr_invwr;
memset(invalidate_wr, 0, sizeof(*invalidate_wr));
invalidate_wr->wr_id = (unsigned long)(void *)mw;
invalidate_wr->opcode = IB_WR_LOCAL_INV;
invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey;
return invalidate_wr;
}
static void
__frwr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
int rc)
{
struct ib_device *device = r_xprt->rx_ia.ri_device;
struct rpcrdma_mw *mw = seg->rl_mw;
struct rpcrdma_frmr *f = &mw->r.frmr;
seg->rl_mw = NULL;
ib_dma_unmap_sg(device, f->sg, f->sg_nents, seg->mr_dir);
if (!rc)
rpcrdma_put_mw(r_xprt, mw);
else
__frwr_queue_recovery(mw);
}
/* Invalidate all memory regions that were registered for "req".
*
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*/
static void
frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_mr_seg *seg;
unsigned int i, nchunks;
struct rpcrdma_frmr *f;
int rc;
dprintk("RPC: %s: req %p\n", __func__, req);
/* ORDER: Invalidate all of the req's MRs first
*
* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call.
*/
invalidate_wrs = pos = prev = NULL;
seg = NULL;
for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
seg = &req->rl_segments[i];
pos = __frwr_prepare_linv_wr(seg);
if (!invalidate_wrs)
invalidate_wrs = pos;
else
prev->next = pos;
prev = pos;
i += seg->mr_nsegs;
}
f = &seg->rl_mw->r.frmr;
/* Strong send queue ordering guarantees that when the
* last WR in the chain completes, all WRs in the chain
* are complete.
*/
f->fr_invwr.send_flags = IB_SEND_SIGNALED;
f->fr_waiter = true;
init_completion(&f->fr_linv_done);
INIT_CQCOUNT(&r_xprt->rx_ep);
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
* unless ri_id->qp is a valid pointer.
*/
rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr);
if (rc)
pr_warn("%s: ib_post_send failed %i\n", __func__, rc);
wait_for_completion(&f->fr_linv_done);
/* ORDER: Now DMA unmap all of the req's MRs, and return
* them to the free MW list.
*/
for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
seg = &req->rl_segments[i];
__frwr_dma_unmap(r_xprt, seg, rc);
i += seg->mr_nsegs;
seg->mr_nsegs = 0;
}
req->rl_nchunks = 0;
}
/* Post a LOCAL_INV Work Request to prevent further remote access /* Post a LOCAL_INV Work Request to prevent further remote access
* via RDMA READ or RDMA WRITE. * via RDMA READ or RDMA WRITE.
*/ */
...@@ -423,23 +551,24 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) ...@@ -423,23 +551,24 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_mw *mw = seg1->rl_mw; struct rpcrdma_mw *mw = seg1->rl_mw;
struct rpcrdma_frmr *frmr = &mw->r.frmr; struct rpcrdma_frmr *frmr = &mw->r.frmr;
struct ib_send_wr invalidate_wr, *bad_wr; struct ib_send_wr *invalidate_wr, *bad_wr;
int rc, nsegs = seg->mr_nsegs; int rc, nsegs = seg->mr_nsegs;
dprintk("RPC: %s: FRMR %p\n", __func__, mw); dprintk("RPC: %s: FRMR %p\n", __func__, mw);
seg1->rl_mw = NULL; seg1->rl_mw = NULL;
frmr->fr_state = FRMR_IS_INVALID; frmr->fr_state = FRMR_IS_INVALID;
invalidate_wr = &mw->r.frmr.fr_invwr;
memset(&invalidate_wr, 0, sizeof(invalidate_wr)); memset(invalidate_wr, 0, sizeof(*invalidate_wr));
invalidate_wr.wr_id = (unsigned long)(void *)mw; invalidate_wr->wr_id = (uintptr_t)mw;
invalidate_wr.opcode = IB_WR_LOCAL_INV; invalidate_wr->opcode = IB_WR_LOCAL_INV;
invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey; invalidate_wr->ex.invalidate_rkey = frmr->fr_mr->rkey;
DECR_CQCOUNT(&r_xprt->rx_ep); DECR_CQCOUNT(&r_xprt->rx_ep);
ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir); ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir);
read_lock(&ia->ri_qplock); read_lock(&ia->ri_qplock);
rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); rc = ib_post_send(ia->ri_id->qp, invalidate_wr, &bad_wr);
read_unlock(&ia->ri_qplock); read_unlock(&ia->ri_qplock);
if (rc) if (rc)
goto out_err; goto out_err;
...@@ -471,6 +600,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf) ...@@ -471,6 +600,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf)
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map, .ro_map = frwr_op_map,
.ro_unmap_sync = frwr_op_unmap_sync,
.ro_unmap = frwr_op_unmap, .ro_unmap = frwr_op_unmap,
.ro_open = frwr_op_open, .ro_open = frwr_op_open,
.ro_maxpages = frwr_op_maxpages, .ro_maxpages = frwr_op_maxpages,
......
...@@ -83,6 +83,18 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) ...@@ -83,6 +83,18 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
return 1; return 1;
} }
/* DMA unmap all memory regions that were mapped for "req".
*/
static void
physical_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct ib_device *device = r_xprt->rx_ia.ri_device;
unsigned int i;
for (i = 0; req->rl_nchunks; --req->rl_nchunks)
rpcrdma_unmap_one(device, &req->rl_segments[i++]);
}
static void static void
physical_op_destroy(struct rpcrdma_buffer *buf) physical_op_destroy(struct rpcrdma_buffer *buf)
{ {
...@@ -90,6 +102,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf) ...@@ -90,6 +102,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf)
const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
.ro_map = physical_op_map, .ro_map = physical_op_map,
.ro_unmap_sync = physical_op_unmap_sync,
.ro_unmap = physical_op_unmap, .ro_unmap = physical_op_unmap,
.ro_open = physical_op_open, .ro_open = physical_op_open,
.ro_maxpages = physical_op_maxpages, .ro_maxpages = physical_op_maxpages,
......
...@@ -804,6 +804,11 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) ...@@ -804,6 +804,11 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
if (req->rl_reply) if (req->rl_reply)
goto out_duplicate; goto out_duplicate;
/* Sanity checking has passed. We are now committed
* to complete this transaction.
*/
list_del_init(&rqst->rq_list);
spin_unlock_bh(&xprt->transport_lock);
dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" dprintk("RPC: %s: reply 0x%p completes request 0x%p\n"
" RPC request 0x%p xid 0x%08x\n", " RPC request 0x%p xid 0x%08x\n",
__func__, rep, req, rqst, __func__, rep, req, rqst,
...@@ -888,12 +893,23 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) ...@@ -888,12 +893,23 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
break; break;
} }
/* Invalidate and flush the data payloads before waking the
* waiting application. This guarantees the memory region is
* properly fenced from the server before the application
* accesses the data. It also ensures proper send flow
* control: waking the next RPC waits until this RPC has
* relinquished all its Send Queue entries.
*/
if (req->rl_nchunks)
r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req);
credits = be32_to_cpu(headerp->rm_credit); credits = be32_to_cpu(headerp->rm_credit);
if (credits == 0) if (credits == 0)
credits = 1; /* don't deadlock */ credits = 1; /* don't deadlock */
else if (credits > r_xprt->rx_buf.rb_max_requests) else if (credits > r_xprt->rx_buf.rb_max_requests)
credits = r_xprt->rx_buf.rb_max_requests; credits = r_xprt->rx_buf.rb_max_requests;
spin_lock_bh(&xprt->transport_lock);
cwnd = xprt->cwnd; cwnd = xprt->cwnd;
xprt->cwnd = credits << RPC_CWNDSHIFT; xprt->cwnd = credits << RPC_CWNDSHIFT;
if (xprt->cwnd > cwnd) if (xprt->cwnd > cwnd)
......
...@@ -576,6 +576,9 @@ xprt_rdma_free(void *buffer) ...@@ -576,6 +576,9 @@ xprt_rdma_free(void *buffer)
rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]); rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]);
req = rb->rg_owner; req = rb->rg_owner;
if (req->rl_backchannel)
return;
r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
......
...@@ -616,10 +616,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ...@@ -616,10 +616,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
/* set trigger for requesting send completion */ /* set trigger for requesting send completion */
ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS) if (ep->rep_cqinit <= 2)
ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS; ep->rep_cqinit = 0; /* always signal? */
else if (ep->rep_cqinit <= 2)
ep->rep_cqinit = 0;
INIT_CQCOUNT(ep); INIT_CQCOUNT(ep);
init_waitqueue_head(&ep->rep_connect_wait); init_waitqueue_head(&ep->rep_connect_wait);
INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
...@@ -852,12 +850,13 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ...@@ -852,12 +850,13 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
if (extras) { if (extras) {
rc = rpcrdma_ep_post_extra_recv(r_xprt, extras); rc = rpcrdma_ep_post_extra_recv(r_xprt, extras);
if (rc) if (rc) {
pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n", pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n",
__func__, rc); __func__, rc);
rc = 0; rc = 0;
} }
} }
}
out: out:
if (rc) if (rc)
...@@ -1337,15 +1336,14 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) ...@@ -1337,15 +1336,14 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_ep *ep = &r_xprt->rx_ep; struct rpcrdma_ep *ep = &r_xprt->rx_ep;
struct rpcrdma_rep *rep; struct rpcrdma_rep *rep;
unsigned long flags;
int rc; int rc;
while (count--) { while (count--) {
spin_lock_irqsave(&buffers->rb_lock, flags); spin_lock(&buffers->rb_lock);
if (list_empty(&buffers->rb_recv_bufs)) if (list_empty(&buffers->rb_recv_bufs))
goto out_reqbuf; goto out_reqbuf;
rep = rpcrdma_buffer_get_rep_locked(buffers); rep = rpcrdma_buffer_get_rep_locked(buffers);
spin_unlock_irqrestore(&buffers->rb_lock, flags); spin_unlock(&buffers->rb_lock);
rc = rpcrdma_ep_post_recv(ia, ep, rep); rc = rpcrdma_ep_post_recv(ia, ep, rep);
if (rc) if (rc)
...@@ -1355,7 +1353,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) ...@@ -1355,7 +1353,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
return 0; return 0;
out_reqbuf: out_reqbuf:
spin_unlock_irqrestore(&buffers->rb_lock, flags); spin_unlock(&buffers->rb_lock);
pr_warn("%s: no extra receive buffers\n", __func__); pr_warn("%s: no extra receive buffers\n", __func__);
return -ENOMEM; return -ENOMEM;
......
...@@ -88,12 +88,6 @@ struct rpcrdma_ep { ...@@ -88,12 +88,6 @@ struct rpcrdma_ep {
struct delayed_work rep_connect_worker; struct delayed_work rep_connect_worker;
}; };
/*
* Force a signaled SEND Work Request every so often,
* in case the provider needs to do some housekeeping.
*/
#define RPCRDMA_MAX_UNSIGNALED_SENDS (32)
#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
...@@ -207,6 +201,12 @@ struct rpcrdma_frmr { ...@@ -207,6 +201,12 @@ struct rpcrdma_frmr {
enum rpcrdma_frmr_state fr_state; enum rpcrdma_frmr_state fr_state;
struct work_struct fr_work; struct work_struct fr_work;
struct rpcrdma_xprt *fr_xprt; struct rpcrdma_xprt *fr_xprt;
bool fr_waiter;
struct completion fr_linv_done;;
union {
struct ib_reg_wr fr_regwr;
struct ib_send_wr fr_invwr;
};
}; };
struct rpcrdma_fmr { struct rpcrdma_fmr {
...@@ -364,6 +364,8 @@ struct rpcrdma_xprt; ...@@ -364,6 +364,8 @@ struct rpcrdma_xprt;
struct rpcrdma_memreg_ops { struct rpcrdma_memreg_ops {
int (*ro_map)(struct rpcrdma_xprt *, int (*ro_map)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *, int, bool); struct rpcrdma_mr_seg *, int, bool);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
struct rpcrdma_req *);
int (*ro_unmap)(struct rpcrdma_xprt *, int (*ro_unmap)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *); struct rpcrdma_mr_seg *);
int (*ro_open)(struct rpcrdma_ia *, int (*ro_open)(struct rpcrdma_ia *,
......
...@@ -398,7 +398,6 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, ...@@ -398,7 +398,6 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
if (unlikely(!sock)) if (unlikely(!sock))
return -ENOTSOCK; return -ENOTSOCK;
clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags);
if (base != 0) { if (base != 0) {
addr = NULL; addr = NULL;
addrlen = 0; addrlen = 0;
...@@ -442,7 +441,6 @@ static void xs_nospace_callback(struct rpc_task *task) ...@@ -442,7 +441,6 @@ static void xs_nospace_callback(struct rpc_task *task)
struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt); struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
transport->inet->sk_write_pending--; transport->inet->sk_write_pending--;
clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
} }
/** /**
...@@ -467,20 +465,11 @@ static int xs_nospace(struct rpc_task *task) ...@@ -467,20 +465,11 @@ static int xs_nospace(struct rpc_task *task)
/* Don't race with disconnect */ /* Don't race with disconnect */
if (xprt_connected(xprt)) { if (xprt_connected(xprt)) {
if (test_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags)) { /* wait for more buffer space */
/*
* Notify TCP that we're limited by the application
* window size
*/
set_bit(SOCK_NOSPACE, &transport->sock->flags);
sk->sk_write_pending++; sk->sk_write_pending++;
/* ...and wait for more buffer space */
xprt_wait_for_buffer_space(task, xs_nospace_callback); xprt_wait_for_buffer_space(task, xs_nospace_callback);
} } else
} else {
clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
ret = -ENOTCONN; ret = -ENOTCONN;
}
spin_unlock_bh(&xprt->transport_lock); spin_unlock_bh(&xprt->transport_lock);
...@@ -616,9 +605,6 @@ static int xs_udp_send_request(struct rpc_task *task) ...@@ -616,9 +605,6 @@ static int xs_udp_send_request(struct rpc_task *task)
case -EAGAIN: case -EAGAIN:
status = xs_nospace(task); status = xs_nospace(task);
break; break;
default:
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
case -ENETUNREACH: case -ENETUNREACH:
case -ENOBUFS: case -ENOBUFS:
case -EPIPE: case -EPIPE:
...@@ -626,7 +612,10 @@ static int xs_udp_send_request(struct rpc_task *task) ...@@ -626,7 +612,10 @@ static int xs_udp_send_request(struct rpc_task *task)
case -EPERM: case -EPERM:
/* When the server has died, an ICMP port unreachable message /* When the server has died, an ICMP port unreachable message
* prompts ECONNREFUSED. */ * prompts ECONNREFUSED. */
clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); break;
default:
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
} }
return status; return status;
...@@ -706,16 +695,16 @@ static int xs_tcp_send_request(struct rpc_task *task) ...@@ -706,16 +695,16 @@ static int xs_tcp_send_request(struct rpc_task *task)
case -EAGAIN: case -EAGAIN:
status = xs_nospace(task); status = xs_nospace(task);
break; break;
default:
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
case -ECONNRESET: case -ECONNRESET:
case -ECONNREFUSED: case -ECONNREFUSED:
case -ENOTCONN: case -ENOTCONN:
case -EADDRINUSE: case -EADDRINUSE:
case -ENOBUFS: case -ENOBUFS:
case -EPIPE: case -EPIPE:
clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags); break;
default:
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
} }
return status; return status;
...@@ -1609,19 +1598,23 @@ static void xs_tcp_state_change(struct sock *sk) ...@@ -1609,19 +1598,23 @@ static void xs_tcp_state_change(struct sock *sk)
static void xs_write_space(struct sock *sk) static void xs_write_space(struct sock *sk)
{ {
struct socket *sock; struct socket_wq *wq;
struct rpc_xprt *xprt; struct rpc_xprt *xprt;
if (unlikely(!(sock = sk->sk_socket))) if (!sk->sk_socket)
return; return;
clear_bit(SOCK_NOSPACE, &sock->flags); clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
if (unlikely(!(xprt = xprt_from_sock(sk)))) if (unlikely(!(xprt = xprt_from_sock(sk))))
return; return;
if (test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags) == 0) rcu_read_lock();
return; wq = rcu_dereference(sk->sk_wq);
if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0)
goto out;
xprt_write_space(xprt); xprt_write_space(xprt);
out:
rcu_read_unlock();
} }
/** /**
...@@ -1907,18 +1900,6 @@ static inline void xs_reclassify_socket(int family, struct socket *sock) ...@@ -1907,18 +1900,6 @@ static inline void xs_reclassify_socket(int family, struct socket *sock)
} }
} }
#else #else
static inline void xs_reclassify_socketu(struct socket *sock)
{
}
static inline void xs_reclassify_socket4(struct socket *sock)
{
}
static inline void xs_reclassify_socket6(struct socket *sock)
{
}
static inline void xs_reclassify_socket(int family, struct socket *sock) static inline void xs_reclassify_socket(int family, struct socket *sock)
{ {
} }
...@@ -2008,7 +1989,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport) ...@@ -2008,7 +1989,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
"transport socket (%d).\n", -status); "transport socket (%d).\n", -status);
goto out; goto out;
} }
xs_reclassify_socketu(sock); xs_reclassify_socket(AF_LOCAL, sock);
dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n", dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n",
xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment