Commit 914ee295 authored by Xin Zhong's avatar Xin Zhong Committed by Chris Mason

Btrfs: pwrite blocked when writing from the mmaped buffer of the same page

This problem is found in meego testing:
http://bugs.meego.com/show_bug.cgi?id=6672
A file in btrfs is mmaped and the mmaped buffer is passed to pwrite to write to the same page
of the same file. In btrfs_file_aio_write(), the pages is locked by prepare_pages(). So when
btrfs_copy_from_user() is called, page fault happens and the same page needs to be locked again
in filemap_fault(). The fix is to move iov_iter_fault_in_readable() before prepage_pages() to make page
fault happen before pages are locked. And also disable page fault in critical region in
btrfs_copy_from_user().

Reviewed-by: Yan, Zheng<zheng.z.yan@intel.com>
Signed-off-by: default avatarZhong, Xin <xin.zhong@intel.com>
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent f106e82c
...@@ -48,30 +48,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, ...@@ -48,30 +48,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
struct page **prepared_pages, struct page **prepared_pages,
struct iov_iter *i) struct iov_iter *i)
{ {
size_t copied; size_t copied = 0;
int pg = 0; int pg = 0;
int offset = pos & (PAGE_CACHE_SIZE - 1); int offset = pos & (PAGE_CACHE_SIZE - 1);
int total_copied = 0;
while (write_bytes > 0) { while (write_bytes > 0) {
size_t count = min_t(size_t, size_t count = min_t(size_t,
PAGE_CACHE_SIZE - offset, write_bytes); PAGE_CACHE_SIZE - offset, write_bytes);
struct page *page = prepared_pages[pg]; struct page *page = prepared_pages[pg];
again: /*
if (unlikely(iov_iter_fault_in_readable(i, count))) * Copy data from userspace to the current page
return -EFAULT; *
* Disable pagefault to avoid recursive lock since
/* Copy data from userspace to the current page */ * the pages are already locked
copied = iov_iter_copy_from_user(page, i, offset, count); */
pagefault_disable();
copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
pagefault_enable();
/* Flush processor's dcache for this page */ /* Flush processor's dcache for this page */
flush_dcache_page(page); flush_dcache_page(page);
iov_iter_advance(i, copied); iov_iter_advance(i, copied);
write_bytes -= copied; write_bytes -= copied;
total_copied += copied;
/* Return to btrfs_file_aio_write to fault page */
if (unlikely(copied == 0)) { if (unlikely(copied == 0)) {
count = min_t(size_t, PAGE_CACHE_SIZE - offset, break;
iov_iter_single_seg_count(i));
goto again;
} }
if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
...@@ -81,7 +85,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, ...@@ -81,7 +85,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
offset = 0; offset = 0;
} }
} }
return 0; return total_copied;
} }
/* /*
...@@ -854,6 +858,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, ...@@ -854,6 +858,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
unsigned long last_index; unsigned long last_index;
int will_write; int will_write;
int buffered = 0; int buffered = 0;
int copied = 0;
int dirty_pages = 0;
will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
(file->f_flags & O_DIRECT)); (file->f_flags & O_DIRECT));
...@@ -970,7 +976,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, ...@@ -970,7 +976,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
WARN_ON(num_pages > nrptrs); WARN_ON(num_pages > nrptrs);
memset(pages, 0, sizeof(struct page *) * nrptrs); memset(pages, 0, sizeof(struct page *) * nrptrs);
ret = btrfs_delalloc_reserve_space(inode, write_bytes); /*
* Fault pages before locking them in prepare_pages
* to avoid recursive lock
*/
if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) {
ret = -EFAULT;
goto out;
}
ret = btrfs_delalloc_reserve_space(inode,
num_pages << PAGE_CACHE_SHIFT);
if (ret) if (ret)
goto out; goto out;
...@@ -978,37 +994,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, ...@@ -978,37 +994,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
pos, first_index, last_index, pos, first_index, last_index,
write_bytes); write_bytes);
if (ret) { if (ret) {
btrfs_delalloc_release_space(inode, write_bytes); btrfs_delalloc_release_space(inode,
num_pages << PAGE_CACHE_SHIFT);
goto out; goto out;
} }
ret = btrfs_copy_from_user(pos, num_pages, copied = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, &i); write_bytes, pages, &i);
if (ret == 0) { dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT;
if (num_pages > dirty_pages) {
if (copied > 0)
atomic_inc(
&BTRFS_I(inode)->outstanding_extents);
btrfs_delalloc_release_space(inode,
(num_pages - dirty_pages) <<
PAGE_CACHE_SHIFT);
}
if (copied > 0) {
dirty_and_release_pages(NULL, root, file, pages, dirty_and_release_pages(NULL, root, file, pages,
num_pages, pos, write_bytes); dirty_pages, pos, copied);
} }
btrfs_drop_pages(pages, num_pages); btrfs_drop_pages(pages, num_pages);
if (ret) {
btrfs_delalloc_release_space(inode, write_bytes);
goto out;
}
if (will_write) { if (copied > 0) {
filemap_fdatawrite_range(inode->i_mapping, pos, if (will_write) {
pos + write_bytes - 1); filemap_fdatawrite_range(inode->i_mapping, pos,
} else { pos + copied - 1);
balance_dirty_pages_ratelimited_nr(inode->i_mapping, } else {
num_pages); balance_dirty_pages_ratelimited_nr(
if (num_pages < inode->i_mapping,
(root->leafsize >> PAGE_CACHE_SHIFT) + 1) dirty_pages);
btrfs_btree_balance_dirty(root, 1); if (dirty_pages <
btrfs_throttle(root); (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
btrfs_btree_balance_dirty(root, 1);
btrfs_throttle(root);
}
} }
pos += write_bytes; pos += copied;
num_written += write_bytes; num_written += copied;
cond_resched(); cond_resched();
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment