Commit ce3510c3 authored by Theodore Ts'o's avatar Theodore Ts'o Committed by Willy Tarreau

ext4: fix race between truncate and __ext4_journalled_writepage()

commit bdf96838 upstream.

The commit cf108bca: "ext4: Invert the locking order of page_lock
and transaction start" caused __ext4_journalled_writepage() to drop
the page lock before the page was written back, as part of changing
the locking order to jbd2_journal_start -> page_lock.  However, this
introduced a potential race if there was a truncate racing with the
data=journalled writeback mode.

Fix this by grabbing the page lock after starting the journal handle,
and then checking to see if page had gotten truncated out from under
us.

This fixes a number of different warnings or BUG_ON's when running
xfstests generic/086 in data=journalled mode, including:

jbd2_journal_dirty_metadata: vdc-8: bad jh for block 115643: transaction (ee3fe7
c0, 164), jh->b_transaction (  (null), 0), jh->b_next_transaction (  (null), 0), jlist 0

	      	      	  - and -

kernel BUG at /usr/projects/linux/ext4/fs/jbd2/transaction.c:2200!
    ...
Call Trace:
 [<c02b2ded>] ? __ext4_journalled_invalidatepage+0x117/0x117
 [<c02b2de5>] __ext4_journalled_invalidatepage+0x10f/0x117
 [<c02b2ded>] ? __ext4_journalled_invalidatepage+0x117/0x117
 [<c027d883>] ? lock_buffer+0x36/0x36
 [<c02b2dfa>] ext4_journalled_invalidatepage+0xd/0x22
 [<c0229139>] do_invalidatepage+0x22/0x26
 [<c0229198>] truncate_inode_page+0x5b/0x85
 [<c022934b>] truncate_inode_pages_range+0x156/0x38c
 [<c0229592>] truncate_inode_pages+0x11/0x15
 [<c022962d>] truncate_pagecache+0x55/0x71
 [<c02b913b>] ext4_setattr+0x4a9/0x560
 [<c01ca542>] ? current_kernel_time+0x10/0x44
 [<c026c4d8>] notify_change+0x1c7/0x2be
 [<c0256a00>] do_truncate+0x65/0x85
 [<c0226f31>] ? file_ra_state_init+0x12/0x29

	      	      	  - and -

WARNING: CPU: 1 PID: 1331 at /usr/projects/linux/ext4/fs/jbd2/transaction.c:1396
irty_metadata+0x14a/0x1ae()
    ...
Call Trace:
 [<c01b879f>] ? console_unlock+0x3a1/0x3ce
 [<c082cbb4>] dump_stack+0x48/0x60
 [<c0178b65>] warn_slowpath_common+0x89/0xa0
 [<c02ef2cf>] ? jbd2_journal_dirty_metadata+0x14a/0x1ae
 [<c0178bef>] warn_slowpath_null+0x14/0x18
 [<c02ef2cf>] jbd2_journal_dirty_metadata+0x14a/0x1ae
 [<c02d8615>] __ext4_handle_dirty_metadata+0xd4/0x19d
 [<c02b2f44>] write_end_fn+0x40/0x53
 [<c02b4a16>] ext4_walk_page_buffers+0x4e/0x6a
 [<c02b59e7>] ext4_writepage+0x354/0x3b8
 [<c02b2f04>] ? mpage_release_unused_pages+0xd4/0xd4
 [<c02b1b21>] ? wait_on_buffer+0x2c/0x2c
 [<c02b5a4b>] ? ext4_writepage+0x3b8/0x3b8
 [<c02b5a5b>] __writepage+0x10/0x2e
 [<c0225956>] write_cache_pages+0x22d/0x32c
 [<c02b5a4b>] ? ext4_writepage+0x3b8/0x3b8
 [<c02b6ee8>] ext4_writepages+0x102/0x607
 [<c019adfe>] ? sched_clock_local+0x10/0x10e
 [<c01a8a7c>] ? __lock_is_held+0x2e/0x44
 [<c01a8ad5>] ? lock_is_held+0x43/0x51
 [<c0226dff>] do_writepages+0x1c/0x29
 [<c0276bed>] __writeback_single_inode+0xc3/0x545
 [<c0277c07>] writeback_sb_inodes+0x21f/0x36d
    ...
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
[bwh: Backported to 3.2: adjust context]
Signed-off-by: default avatarBen Hutchings <ben@decadent.org.uk>
(cherry picked from commit b77ea3c2)
[wt: adjusted context since we're missing 441c8508]
Signed-off-by: default avatarWilly Tarreau <w@1wt.eu>
parent b437e292
...@@ -2670,13 +2670,27 @@ static int __ext4_journalled_writepage(struct page *page, ...@@ -2670,13 +2670,27 @@ static int __ext4_journalled_writepage(struct page *page,
page_bufs = page_buffers(page); page_bufs = page_buffers(page);
BUG_ON(!page_bufs); BUG_ON(!page_bufs);
walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
/* As soon as we unlock the page, it can go away, but we have /*
* references to buffers so we are safe */ * We need to release the page lock before we start the
* journal, so grab a reference so the page won't disappear
* out from under us.
*/
get_page(page);
unlock_page(page); unlock_page(page);
handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
ret = PTR_ERR(handle); ret = PTR_ERR(handle);
put_page(page);
goto out_no_pagelock;
}
lock_page(page);
put_page(page);
if (page->mapping != mapping) {
/* The page got truncated from under us */
ext4_journal_stop(handle);
ret = 0;
goto out; goto out;
} }
...@@ -2694,6 +2708,8 @@ static int __ext4_journalled_writepage(struct page *page, ...@@ -2694,6 +2708,8 @@ static int __ext4_journalled_writepage(struct page *page,
walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one);
ext4_set_inode_state(inode, EXT4_STATE_JDATA); ext4_set_inode_state(inode, EXT4_STATE_JDATA);
out: out:
unlock_page(page);
out_no_pagelock:
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment