Commit 1ac0fc8a authored by Yan, Zheng's avatar Yan, Zheng Committed by Sage Weil

ceph: fix race between writepages and truncate

ceph_writepages_start() reads inode->i_size in two places. It can get
different values between successive read, because truncate can change
inode->i_size at any time. The race can lead to mismatch between data
length of osd request and pages marked as writeback. When osd request
finishes, it clear writeback page according to its data length. So
some pages can be left in writeback state forever. The fix is only
read inode->i_size once, save its value to a local variable and use
the local variable when i_size is needed.
Signed-off-by: default avatarYan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: default avatarAlex Elder <elder@inktank.com>
parent 03d254ed
...@@ -671,7 +671,7 @@ static int ceph_writepages_start(struct address_space *mapping, ...@@ -671,7 +671,7 @@ static int ceph_writepages_start(struct address_space *mapping,
unsigned wsize = 1 << inode->i_blkbits; unsigned wsize = 1 << inode->i_blkbits;
struct ceph_osd_request *req = NULL; struct ceph_osd_request *req = NULL;
int do_sync; int do_sync;
u64 snap_size = 0; u64 snap_size;
/* /*
* Include a 'sync' in the OSD request if this is a data * Include a 'sync' in the OSD request if this is a data
...@@ -717,6 +717,7 @@ static int ceph_writepages_start(struct address_space *mapping, ...@@ -717,6 +717,7 @@ static int ceph_writepages_start(struct address_space *mapping,
retry: retry:
/* find oldest snap context with dirty data */ /* find oldest snap context with dirty data */
ceph_put_snap_context(snapc); ceph_put_snap_context(snapc);
snap_size = 0;
snapc = get_oldest_context(inode, &snap_size); snapc = get_oldest_context(inode, &snap_size);
if (!snapc) { if (!snapc) {
/* hmm, why does writepages get called when there /* hmm, why does writepages get called when there
...@@ -724,6 +725,8 @@ static int ceph_writepages_start(struct address_space *mapping, ...@@ -724,6 +725,8 @@ static int ceph_writepages_start(struct address_space *mapping,
dout(" no snap context with dirty data?\n"); dout(" no snap context with dirty data?\n");
goto out; goto out;
} }
if (snap_size == 0)
snap_size = i_size_read(inode);
dout(" oldest snapc is %p seq %lld (%d snaps)\n", dout(" oldest snapc is %p seq %lld (%d snaps)\n",
snapc, snapc->seq, snapc->num_snaps); snapc, snapc->seq, snapc->num_snaps);
if (last_snapc && snapc != last_snapc) { if (last_snapc && snapc != last_snapc) {
...@@ -795,11 +798,8 @@ static int ceph_writepages_start(struct address_space *mapping, ...@@ -795,11 +798,8 @@ static int ceph_writepages_start(struct address_space *mapping,
dout("waiting on writeback %p\n", page); dout("waiting on writeback %p\n", page);
wait_on_page_writeback(page); wait_on_page_writeback(page);
} }
if ((snap_size && page_offset(page) > snap_size) || if (page_offset(page) >= snap_size) {
(!snap_size && dout("%p page eof %llu\n", page, snap_size);
page_offset(page) > i_size_read(inode))) {
dout("%p page eof %llu\n", page, snap_size ?
snap_size : i_size_read(inode));
done = 1; done = 1;
unlock_page(page); unlock_page(page);
break; break;
...@@ -911,7 +911,7 @@ static int ceph_writepages_start(struct address_space *mapping, ...@@ -911,7 +911,7 @@ static int ceph_writepages_start(struct address_space *mapping,
/* Format the osd request message and submit the write */ /* Format the osd request message and submit the write */
offset = page_offset(pages[0]); offset = page_offset(pages[0]);
len = min((snap_size ? snap_size : i_size_read(inode)) - offset, len = min(snap_size - offset,
(u64)locked_pages << PAGE_CACHE_SHIFT); (u64)locked_pages << PAGE_CACHE_SHIFT);
dout("writepages got %d pages at %llu~%llu\n", dout("writepages got %d pages at %llu~%llu\n",
locked_pages, offset, len); locked_pages, offset, len);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment