Commit 11c65dcc authored by Josef Bacik's avatar Josef Bacik Committed by Chris Mason

Btrfs: do aio_write instead of write

In order for AIO to work, we need to implement aio_write.  This patch converts
our btrfs_file_write to btrfs_aio_write.  I've tested this with xfstests and
nothing broke, and the AIO stuff magically started working.  Thanks,
Signed-off-by: default avatarJosef Bacik <josef@redhat.com>
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 4b46fce2
...@@ -2017,6 +2017,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, ...@@ -2017,6 +2017,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
sector_t sector; sector_t sector;
struct extent_map *em; struct extent_map *em;
struct block_device *bdev; struct block_device *bdev;
struct btrfs_ordered_extent *ordered;
int ret; int ret;
int nr = 0; int nr = 0;
size_t page_offset = 0; size_t page_offset = 0;
...@@ -2028,7 +2029,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree, ...@@ -2028,7 +2029,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
set_page_extent_mapped(page); set_page_extent_mapped(page);
end = page_end; end = page_end;
lock_extent(tree, start, end, GFP_NOFS); while (1) {
lock_extent(tree, start, end, GFP_NOFS);
ordered = btrfs_lookup_ordered_extent(inode, start);
if (!ordered)
break;
unlock_extent(tree, start, end, GFP_NOFS);
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
}
if (page->index == last_byte >> PAGE_CACHE_SHIFT) { if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
char *userpage; char *userpage;
......
...@@ -46,32 +46,42 @@ ...@@ -46,32 +46,42 @@
static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
int write_bytes, int write_bytes,
struct page **prepared_pages, struct page **prepared_pages,
const char __user *buf) struct iov_iter *i)
{ {
long page_fault = 0; size_t copied;
int i; int pg = 0;
int offset = pos & (PAGE_CACHE_SIZE - 1); int offset = pos & (PAGE_CACHE_SIZE - 1);
for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { while (write_bytes > 0) {
size_t count = min_t(size_t, size_t count = min_t(size_t,
PAGE_CACHE_SIZE - offset, write_bytes); PAGE_CACHE_SIZE - offset, write_bytes);
struct page *page = prepared_pages[i]; struct page *page = prepared_pages[pg];
fault_in_pages_readable(buf, count); again:
if (unlikely(iov_iter_fault_in_readable(i, count)))
return -EFAULT;
/* Copy data from userspace to the current page */ /* Copy data from userspace to the current page */
kmap(page); copied = iov_iter_copy_from_user(page, i, offset, count);
page_fault = __copy_from_user(page_address(page) + offset,
buf, count);
/* Flush processor's dcache for this page */ /* Flush processor's dcache for this page */
flush_dcache_page(page); flush_dcache_page(page);
kunmap(page); iov_iter_advance(i, copied);
buf += count; write_bytes -= copied;
write_bytes -= count;
if (page_fault) if (unlikely(copied == 0)) {
break; count = min_t(size_t, PAGE_CACHE_SIZE - offset,
iov_iter_single_seg_count(i));
goto again;
}
if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
offset += copied;
} else {
pg++;
offset = 0;
}
} }
return page_fault ? -EFAULT : 0; return 0;
} }
/* /*
...@@ -822,60 +832,24 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, ...@@ -822,60 +832,24 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
return 0; return 0;
} }
/* Copied from read-write.c */ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
static void wait_on_retry_sync_kiocb(struct kiocb *iocb) const struct iovec *iov,
{ unsigned long nr_segs, loff_t pos)
set_current_state(TASK_UNINTERRUPTIBLE);
if (!kiocbIsKicked(iocb))
schedule();
else
kiocbClearKicked(iocb);
__set_current_state(TASK_RUNNING);
}
/*
* Just a copy of what do_sync_write does.
*/
static ssize_t __btrfs_direct_write(struct file *file, const char __user *buf,
size_t count, loff_t pos, loff_t *ppos)
{ {
struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; struct file *file = iocb->ki_filp;
unsigned long nr_segs = 1; struct inode *inode = fdentry(file)->d_inode;
struct kiocb kiocb; struct btrfs_root *root = BTRFS_I(inode)->root;
ssize_t ret; struct page *pinned[2];
struct page **pages = NULL;
init_sync_kiocb(&kiocb, file); struct iov_iter i;
kiocb.ki_pos = pos; loff_t *ppos = &iocb->ki_pos;
kiocb.ki_left = count;
kiocb.ki_nbytes = count;
while (1) {
ret = generic_file_direct_write(&kiocb, &iov, &nr_segs, pos,
ppos, count, count);
if (ret != -EIOCBRETRY)
break;
wait_on_retry_sync_kiocb(&kiocb);
}
if (ret == -EIOCBQUEUED)
ret = wait_on_sync_kiocb(&kiocb);
*ppos = kiocb.ki_pos;
return ret;
}
static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
loff_t pos;
loff_t start_pos; loff_t start_pos;
ssize_t num_written = 0; ssize_t num_written = 0;
ssize_t err = 0; ssize_t err = 0;
size_t count;
size_t ocount;
int ret = 0; int ret = 0;
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct page **pages = NULL;
int nrptrs; int nrptrs;
struct page *pinned[2];
unsigned long first_index; unsigned long first_index;
unsigned long last_index; unsigned long last_index;
int will_write; int will_write;
...@@ -887,13 +861,17 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ...@@ -887,13 +861,17 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
pinned[0] = NULL; pinned[0] = NULL;
pinned[1] = NULL; pinned[1] = NULL;
pos = *ppos;
start_pos = pos; start_pos = pos;
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
if (err)
goto out;
count = ocount;
current->backing_dev_info = inode->i_mapping->backing_dev_info; current->backing_dev_info = inode->i_mapping->backing_dev_info;
err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
if (err) if (err)
...@@ -910,14 +888,48 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ...@@ -910,14 +888,48 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
BTRFS_I(inode)->sequence++; BTRFS_I(inode)->sequence++;
if (unlikely(file->f_flags & O_DIRECT)) { if (unlikely(file->f_flags & O_DIRECT)) {
num_written = __btrfs_direct_write(file, buf, count, pos, ret = btrfs_delalloc_reserve_space(inode, count);
ppos); if (ret)
pos += num_written; goto out;
count -= num_written;
/* We've written everything we wanted to, exit */ num_written = generic_file_direct_write(iocb, iov, &nr_segs,
if (num_written < 0 || !count) pos, ppos, count,
ocount);
/*
* the generic O_DIRECT will update in-memory i_size after the
* DIOs are done. But our endio handlers that update the on
* disk i_size never update past the in memory i_size. So we
* need one more update here to catch any additions to the
* file
*/
if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
mark_inode_dirty(inode);
}
if (num_written < 0) {
if (num_written != -EIOCBQUEUED) {
/*
* aio land will take care of releasing the
* delalloc
*/
btrfs_delalloc_release_space(inode, count);
}
ret = num_written;
num_written = 0;
goto out; goto out;
} else if (num_written == count) {
/* pick up pos changes done by the generic code */
pos = *ppos;
goto out;
}
/*
* the buffered IO will reserve bytes for the rest of the
* range, don't double count them here
*/
btrfs_delalloc_release_space(inode, count - num_written);
/* /*
* We are going to do buffered for the rest of the range, so we * We are going to do buffered for the rest of the range, so we
...@@ -925,18 +937,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ...@@ -925,18 +937,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
* done. * done.
*/ */
buffered = 1; buffered = 1;
buf += num_written; pos += num_written;
} }
nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, iov_iter_init(&i, iov, nr_segs, count, num_written);
PAGE_CACHE_SIZE / (sizeof(struct page *))); nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) /
PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
(sizeof(struct page *)));
pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
/* generic_write_checks can change our pos */ /* generic_write_checks can change our pos */
start_pos = pos; start_pos = pos;
first_index = pos >> PAGE_CACHE_SHIFT; first_index = pos >> PAGE_CACHE_SHIFT;
last_index = (pos + count) >> PAGE_CACHE_SHIFT; last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
/* /*
* there are lots of better ways to do this, but this code * there are lots of better ways to do this, but this code
...@@ -953,7 +967,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ...@@ -953,7 +967,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
unlock_page(pinned[0]); unlock_page(pinned[0]);
} }
} }
if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
pinned[1] = grab_cache_page(inode->i_mapping, last_index); pinned[1] = grab_cache_page(inode->i_mapping, last_index);
if (!PageUptodate(pinned[1])) { if (!PageUptodate(pinned[1])) {
ret = btrfs_readpage(NULL, pinned[1]); ret = btrfs_readpage(NULL, pinned[1]);
...@@ -964,10 +978,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ...@@ -964,10 +978,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
} }
} }
while (count > 0) { while (iov_iter_count(&i) > 0) {
size_t offset = pos & (PAGE_CACHE_SIZE - 1); size_t offset = pos & (PAGE_CACHE_SIZE - 1);
size_t write_bytes = min(count, nrptrs * size_t write_bytes = min(iov_iter_count(&i),
(size_t)PAGE_CACHE_SIZE - nrptrs * (size_t)PAGE_CACHE_SIZE -
offset); offset);
size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT; PAGE_CACHE_SHIFT;
...@@ -988,7 +1002,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ...@@ -988,7 +1002,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
} }
ret = btrfs_copy_from_user(pos, num_pages, ret = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, buf); write_bytes, pages, &i);
if (ret == 0) { if (ret == 0) {
dirty_and_release_pages(NULL, root, file, pages, dirty_and_release_pages(NULL, root, file, pages,
num_pages, pos, write_bytes); num_pages, pos, write_bytes);
...@@ -1012,8 +1026,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ...@@ -1012,8 +1026,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
btrfs_throttle(root); btrfs_throttle(root);
} }
buf += write_bytes;
count -= write_bytes;
pos += write_bytes; pos += write_bytes;
num_written += write_bytes; num_written += write_bytes;
...@@ -1206,7 +1218,7 @@ const struct file_operations btrfs_file_operations = { ...@@ -1206,7 +1218,7 @@ const struct file_operations btrfs_file_operations = {
.read = do_sync_read, .read = do_sync_read,
.aio_read = generic_file_aio_read, .aio_read = generic_file_aio_read,
.splice_read = generic_file_splice_read, .splice_read = generic_file_splice_read,
.write = btrfs_file_write, .aio_write = btrfs_file_aio_write,
.mmap = btrfs_file_mmap, .mmap = btrfs_file_mmap,
.open = generic_file_open, .open = generic_file_open,
.release = btrfs_release_file, .release = btrfs_release_file,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment