Commit d801327d authored by Jeff Layton's avatar Jeff Layton Committed by Ilya Dryomov

ceph: convert ceph_write_begin to netfs_write_begin

Convert ceph_write_begin to use the netfs_write_begin helper. Most of
the ops we need for it are already in place from the readpage conversion
but we do add a new check_write_begin op since ceph needs to be able to
vet whether there is an incompatible writeback already in flight before
reading in the page.

With this, we can also remove the old ceph_do_readpage helper.
Signed-off-by: default avatarJeff Layton <jlayton@kernel.org>
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent f0702876
...@@ -62,6 +62,9 @@ ...@@ -62,6 +62,9 @@
(CONGESTION_ON_THRESH(congestion_kb) - \ (CONGESTION_ON_THRESH(congestion_kb) - \
(CONGESTION_ON_THRESH(congestion_kb) >> 2)) (CONGESTION_ON_THRESH(congestion_kb) >> 2))
static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
struct page *page, void **_fsdata);
static inline struct ceph_snap_context *page_snap_context(struct page *page) static inline struct ceph_snap_context *page_snap_context(struct page *page)
{ {
if (PagePrivate(page)) if (PagePrivate(page))
...@@ -309,6 +312,7 @@ const struct netfs_read_request_ops ceph_netfs_read_ops = { ...@@ -309,6 +312,7 @@ const struct netfs_read_request_ops ceph_netfs_read_ops = {
.issue_op = ceph_netfs_issue_op, .issue_op = ceph_netfs_issue_op,
.expand_readahead = ceph_netfs_expand_readahead, .expand_readahead = ceph_netfs_expand_readahead,
.clamp_length = ceph_netfs_clamp_length, .clamp_length = ceph_netfs_clamp_length,
.check_write_begin = ceph_netfs_check_write_begin,
}; };
/* read a single page, without unlocking it. */ /* read a single page, without unlocking it. */
...@@ -341,76 +345,6 @@ static int ceph_readpage(struct file *file, struct page *page) ...@@ -341,76 +345,6 @@ static int ceph_readpage(struct file *file, struct page *page)
return netfs_readpage(file, page, &ceph_netfs_read_ops, NULL); return netfs_readpage(file, page, &ceph_netfs_read_ops, NULL);
} }
/* read a single page, without unlocking it. */
static int ceph_do_readpage(struct file *filp, struct page *page)
{
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_osd_request *req;
struct ceph_vino vino = ceph_vino(inode);
int err = 0;
u64 off = page_offset(page);
u64 len = PAGE_SIZE;
if (off >= i_size_read(inode)) {
zero_user_segment(page, 0, PAGE_SIZE);
SetPageUptodate(page);
return 0;
}
if (ci->i_inline_version != CEPH_INLINE_NONE) {
/*
* Uptodate inline data should have been added
* into page cache while getting Fcr caps.
*/
if (off == 0)
return -EINVAL;
zero_user_segment(page, 0, PAGE_SIZE);
SetPageUptodate(page);
return 0;
}
dout("readpage ino %llx.%llx file %p off %llu len %llu page %p index %lu\n",
vino.ino, vino.snap, filp, off, len, page, page->index);
req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len, 0, 1,
CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL,
ci->i_truncate_seq, ci->i_truncate_size,
false);
if (IS_ERR(req))
return PTR_ERR(req);
osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false);
err = ceph_osdc_start_request(osdc, req, false);
if (!err)
err = ceph_osdc_wait_request(osdc, req);
ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, err);
ceph_osdc_put_request(req);
dout("readpage result %d\n", err);
if (err == -ENOENT)
err = 0;
if (err < 0) {
if (err == -EBLOCKLISTED)
fsc->blocklisted = true;
goto out;
}
if (err < PAGE_SIZE)
/* zero fill remainder of page */
zero_user_segment(page, err, PAGE_SIZE);
else
flush_dcache_page(page);
SetPageUptodate(page);
out:
return err < 0 ? err : 0;
}
/* /*
* Finish an async read(ahead) op. * Finish an async read(ahead) op.
*/ */
...@@ -1430,6 +1364,31 @@ ceph_find_incompatible(struct page *page) ...@@ -1430,6 +1364,31 @@ ceph_find_incompatible(struct page *page)
return NULL; return NULL;
} }
static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
struct page *page, void **_fsdata)
{
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_snap_context *snapc;
snapc = ceph_find_incompatible(page);
if (snapc) {
int r;
unlock_page(page);
put_page(page);
if (IS_ERR(snapc))
return PTR_ERR(snapc);
ceph_queue_writeback(inode);
r = wait_event_killable(ci->i_cap_wq,
context_is_writeable_or_written(inode, snapc));
ceph_put_snap_context(snapc);
return r == 0 ? -EAGAIN : r;
}
return 0;
}
/* /*
* We are only allowed to write into/dirty the page if the page is * We are only allowed to write into/dirty the page if the page is
* clean, or already dirty within the same snap context. * clean, or already dirty within the same snap context.
...@@ -1440,75 +1399,47 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, ...@@ -1440,75 +1399,47 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
{ {
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_snap_context *snapc;
struct page *page = NULL; struct page *page = NULL;
pgoff_t index = pos >> PAGE_SHIFT; pgoff_t index = pos >> PAGE_SHIFT;
int pos_in_page = pos & ~PAGE_MASK; int r;
int r = 0;
dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len);
for (;;) { /*
* Uninlining should have already been done and everything updated, EXCEPT
* for inline_version sent to the MDS.
*/
if (ci->i_inline_version != CEPH_INLINE_NONE) {
page = grab_cache_page_write_begin(mapping, index, flags); page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) { if (!page)
r = -ENOMEM; return -ENOMEM;
break;
}
snapc = ceph_find_incompatible(page);
if (snapc) {
if (IS_ERR(snapc)) {
r = PTR_ERR(snapc);
break;
}
unlock_page(page);
put_page(page);
page = NULL;
ceph_queue_writeback(inode);
r = wait_event_killable(ci->i_cap_wq,
context_is_writeable_or_written(inode, snapc));
ceph_put_snap_context(snapc);
if (r != 0)
break;
continue;
}
if (PageUptodate(page)) {
dout(" page %p already uptodate\n", page);
break;
}
/* /*
* In some cases we don't need to read at all: * The inline_version on a new inode is set to 1. If that's the
* - full page write * case, then the page is brand new and isn't yet Uptodate.
* - write that lies completely beyond EOF
* - write that covers the the page from start to EOF or beyond it
*/ */
if ((pos_in_page == 0 && len == PAGE_SIZE) || r = 0;
(pos >= i_size_read(inode)) || if (index == 0 && ci->i_inline_version != 1) {
(pos_in_page == 0 && (pos + len) >= i_size_read(inode))) { if (!PageUptodate(page)) {
zero_user_segments(page, 0, pos_in_page, WARN_ONCE(1, "ceph: write_begin called on still-inlined inode (inline_version %llu)!\n",
pos_in_page + len, PAGE_SIZE); ci->i_inline_version);
break; r = -EINVAL;
}
goto out;
} }
zero_user_segment(page, 0, PAGE_SIZE);
/* SetPageUptodate(page);
* We need to read it. If we get back -EINPROGRESS, then the page was goto out;
* handed off to fscache and it will be unlocked when the read completes.
* Refind the page in that case so we can reacquire the page lock. Otherwise
* we got a hard error or the read was completed synchronously.
*/
r = ceph_do_readpage(file, page);
if (r != -EINPROGRESS)
break;
} }
r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &page, NULL,
&ceph_netfs_read_ops, NULL);
out:
if (r == 0)
wait_on_page_fscache(page);
if (r < 0) { if (r < 0) {
if (page) { if (page)
unlock_page(page);
put_page(page); put_page(page);
}
} else { } else {
WARN_ON_ONCE(!PageLocked(page));
*pagep = page; *pagep = page;
} }
return r; return r;
...@@ -1681,9 +1612,6 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) ...@@ -1681,9 +1612,6 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
return ret; return ret;
} }
/*
* Reuse write_begin here for simplicity.
*/
static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
{ {
struct vm_area_struct *vma = vmf->vma; struct vm_area_struct *vma = vmf->vma;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment