Commit 53e87268 authored by Jan Kara's avatar Jan Kara Committed by Theodore Ts'o

ext4: fix deadlock in journal_unmap_buffer()

We cannot wait for transaction commit in journal_unmap_buffer()
because we hold page lock which ranks below transaction start.  We
solve the issue by bailing out of journal_unmap_buffer() and
jbd2_journal_invalidatepage() with -EBUSY.  Caller is then responsible
for waiting for transaction commit to finish and try invalidation
again. Since the issue can happen only for page stradding i_size, it
is simple enough to manually call jbd2_journal_invalidatepage() for
such page from ext4_setattr(), check the return value and wait if
necessary.
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent 4520fb3c
...@@ -2894,8 +2894,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset) ...@@ -2894,8 +2894,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
block_invalidatepage(page, offset); block_invalidatepage(page, offset);
} }
static void ext4_journalled_invalidatepage(struct page *page, static int __ext4_journalled_invalidatepage(struct page *page,
unsigned long offset) unsigned long offset)
{ {
journal_t *journal = EXT4_JOURNAL(page->mapping->host); journal_t *journal = EXT4_JOURNAL(page->mapping->host);
...@@ -2907,7 +2907,14 @@ static void ext4_journalled_invalidatepage(struct page *page, ...@@ -2907,7 +2907,14 @@ static void ext4_journalled_invalidatepage(struct page *page,
if (offset == 0) if (offset == 0)
ClearPageChecked(page); ClearPageChecked(page);
jbd2_journal_invalidatepage(journal, page, offset); return jbd2_journal_invalidatepage(journal, page, offset);
}
/* Wrapper for aops... */
static void ext4_journalled_invalidatepage(struct page *page,
unsigned long offset)
{
WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0);
} }
static int ext4_releasepage(struct page *page, gfp_t wait) static int ext4_releasepage(struct page *page, gfp_t wait)
...@@ -4313,6 +4320,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) ...@@ -4313,6 +4320,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
return err; return err;
} }
/*
* In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
* buffers that are attached to a page stradding i_size and are undergoing
* commit. In that case we have to wait for commit to finish and try again.
*/
static void ext4_wait_for_tail_page_commit(struct inode *inode)
{
struct page *page;
unsigned offset;
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
tid_t commit_tid = 0;
int ret;
offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
/*
* All buffers in the last page remain valid? Then there's nothing to
* do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
* blocksize case
*/
if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
return;
while (1) {
page = find_lock_page(inode->i_mapping,
inode->i_size >> PAGE_CACHE_SHIFT);
if (!page)
return;
ret = __ext4_journalled_invalidatepage(page, offset);
unlock_page(page);
page_cache_release(page);
if (ret != -EBUSY)
return;
commit_tid = 0;
read_lock(&journal->j_state_lock);
if (journal->j_committing_transaction)
commit_tid = journal->j_committing_transaction->t_tid;
read_unlock(&journal->j_state_lock);
if (commit_tid)
jbd2_log_wait_commit(journal, commit_tid);
}
}
/* /*
* ext4_setattr() * ext4_setattr()
* *
...@@ -4426,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ...@@ -4426,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
} }
if (attr->ia_valid & ATTR_SIZE) { if (attr->ia_valid & ATTR_SIZE) {
if (attr->ia_size != i_size_read(inode)) { if (attr->ia_size != inode->i_size) {
truncate_setsize(inode, attr->ia_size); loff_t oldsize = inode->i_size;
/* Inode size will be reduced, wait for dio in flight.
* Temporarily disable dioread_nolock to prevent i_size_write(inode, attr->ia_size);
* livelock. */ /*
* Blocks are going to be removed from the inode. Wait
* for dio in flight. Temporarily disable
* dioread_nolock to prevent livelock.
*/
if (orphan) { if (orphan) {
ext4_inode_block_unlocked_dio(inode); if (!ext4_should_journal_data(inode)) {
inode_dio_wait(inode); ext4_inode_block_unlocked_dio(inode);
ext4_inode_resume_unlocked_dio(inode); inode_dio_wait(inode);
ext4_inode_resume_unlocked_dio(inode);
} else
ext4_wait_for_tail_page_commit(inode);
} }
/*
* Truncate pagecache after we've waited for commit
* in data=journal mode to make pages freeable.
*/
truncate_pagecache(inode, oldsize, inode->i_size);
} }
ext4_truncate(inode); ext4_truncate(inode);
} }
......
...@@ -1840,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, ...@@ -1840,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
BUFFER_TRACE(bh, "entry"); BUFFER_TRACE(bh, "entry");
retry:
/* /*
* It is safe to proceed here without the j_list_lock because the * It is safe to proceed here without the j_list_lock because the
* buffers cannot be stolen by try_to_free_buffers as long as we are * buffers cannot be stolen by try_to_free_buffers as long as we are
...@@ -1935,14 +1934,11 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, ...@@ -1935,14 +1934,11 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
* for commit and try again. * for commit and try again.
*/ */
if (partial_page) { if (partial_page) {
tid_t tid = journal->j_committing_transaction->t_tid;
jbd2_journal_put_journal_head(jh); jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
write_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
jbd2_log_wait_commit(journal, tid); return -EBUSY;
goto retry;
} }
/* /*
* OK, buffer won't be reachable after truncate. We just set * OK, buffer won't be reachable after truncate. We just set
...@@ -2003,21 +1999,23 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, ...@@ -2003,21 +1999,23 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
* @page: page to flush * @page: page to flush
* @offset: length of page to invalidate. * @offset: length of page to invalidate.
* *
* Reap page buffers containing data after offset in page. * Reap page buffers containing data after offset in page. Can return -EBUSY
* * if buffers are part of the committing transaction and the page is straddling
* i_size. Caller then has to wait for current commit and try again.
*/ */
void jbd2_journal_invalidatepage(journal_t *journal, int jbd2_journal_invalidatepage(journal_t *journal,
struct page *page, struct page *page,
unsigned long offset) unsigned long offset)
{ {
struct buffer_head *head, *bh, *next; struct buffer_head *head, *bh, *next;
unsigned int curr_off = 0; unsigned int curr_off = 0;
int may_free = 1; int may_free = 1;
int ret = 0;
if (!PageLocked(page)) if (!PageLocked(page))
BUG(); BUG();
if (!page_has_buffers(page)) if (!page_has_buffers(page))
return; return 0;
/* We will potentially be playing with lists other than just the /* We will potentially be playing with lists other than just the
* data lists (especially for journaled data mode), so be * data lists (especially for journaled data mode), so be
...@@ -2031,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal, ...@@ -2031,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal,
if (offset <= curr_off) { if (offset <= curr_off) {
/* This block is wholly outside the truncation point */ /* This block is wholly outside the truncation point */
lock_buffer(bh); lock_buffer(bh);
may_free &= journal_unmap_buffer(journal, bh, ret = journal_unmap_buffer(journal, bh, offset > 0);
offset > 0);
unlock_buffer(bh); unlock_buffer(bh);
if (ret < 0)
return ret;
may_free &= ret;
} }
curr_off = next_off; curr_off = next_off;
bh = next; bh = next;
...@@ -2044,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal, ...@@ -2044,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal,
if (may_free && try_to_free_buffers(page)) if (may_free && try_to_free_buffers(page))
J_ASSERT(!page_has_buffers(page)); J_ASSERT(!page_has_buffers(page));
} }
return 0;
} }
/* /*
......
...@@ -1098,7 +1098,7 @@ void jbd2_journal_set_triggers(struct buffer_head *, ...@@ -1098,7 +1098,7 @@ void jbd2_journal_set_triggers(struct buffer_head *,
extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
extern int jbd2_journal_forget (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *);
extern void journal_sync_buffer (struct buffer_head *); extern void journal_sync_buffer (struct buffer_head *);
extern void jbd2_journal_invalidatepage(journal_t *, extern int jbd2_journal_invalidatepage(journal_t *,
struct page *, unsigned long); struct page *, unsigned long);
extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
extern int jbd2_journal_stop(handle_t *); extern int jbd2_journal_stop(handle_t *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment