Commit c3cd6283 authored by Sage Weil's avatar Sage Weil

ceph: fix short sync reads from the OSD

If we get a short read from the OSD because the object is small, we need to
zero the remainder of the buffer.  For O_DIRECT reads, the attempted range
is not trimmed to i_size by the VFS, so we were actually looping
indefinitely.

Fix by trimming by i_size, and the unconditionally zeroing the trailing
range.
Reported-by: default avatarJeff Wu <cpwu@tnsoft.com.cn>
Signed-off-by: default avatarSage Weil <sage@newdream.net>
parent 25845472
...@@ -283,7 +283,7 @@ int ceph_release(struct inode *inode, struct file *file) ...@@ -283,7 +283,7 @@ int ceph_release(struct inode *inode, struct file *file)
static int striped_read(struct inode *inode, static int striped_read(struct inode *inode,
u64 off, u64 len, u64 off, u64 len,
struct page **pages, int num_pages, struct page **pages, int num_pages,
int *checkeof, bool align_to_pages, int *checkeof, bool o_direct,
unsigned long buf_align) unsigned long buf_align)
{ {
struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
...@@ -308,7 +308,7 @@ static int striped_read(struct inode *inode, ...@@ -308,7 +308,7 @@ static int striped_read(struct inode *inode,
io_align = off & ~PAGE_MASK; io_align = off & ~PAGE_MASK;
more: more:
if (align_to_pages) if (o_direct)
page_align = (pos - io_align + buf_align) & ~PAGE_MASK; page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
else else
page_align = pos & ~PAGE_MASK; page_align = pos & ~PAGE_MASK;
...@@ -346,20 +346,22 @@ static int striped_read(struct inode *inode, ...@@ -346,20 +346,22 @@ static int striped_read(struct inode *inode,
} }
if (was_short) { if (was_short) {
/* was original extent fully inside i_size? */ /* did we bounce off eof? */
if (pos + left <= inode->i_size) { if (pos + left > inode->i_size)
dout("zero tail\n"); *checkeof = 1;
ceph_zero_page_vector_range(page_off + read, len - read,
/* zero trailing bytes (inside i_size) */
if (left > 0 && pos < inode->i_size) {
if (pos + left > inode->i_size)
left = inode->i_size - pos;
dout("zero tail %d\n", left);
ceph_zero_page_vector_range(page_off + read, left,
pages); pages);
read = len; read += left;
goto out;
} }
/* check i_size */
*checkeof = 1;
} }
out:
if (ret >= 0) if (ret >= 0)
ret = read; ret = read;
dout("striped_read returns %d\n", ret); dout("striped_read returns %d\n", ret);
...@@ -659,7 +661,7 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov, ...@@ -659,7 +661,7 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
/* hit EOF or hole? */ /* hit EOF or hole? */
if (statret == 0 && *ppos < inode->i_size) { if (statret == 0 && *ppos < inode->i_size) {
dout("aio_read sync_read hit hole, reading more\n"); dout("aio_read sync_read hit hole, ppos %lld < size %lld, reading more\n", *ppos, inode->i_size);
read += ret; read += ret;
base += ret; base += ret;
len -= ret; len -= ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment