Commit 332c8cf1 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] direct-io AIO fixes

From: Suparna Bhattacharya <suparna@in.ibm.com>

Fixes the following remaining issues with the DIO code:

1. During DIO file extends, intermediate writes could extend i_size
   exposing unwritten blocks to intermediate reads (Soln: Don't drop i_sem
   for file extends)

2. AIO-DIO file extends may update i_size before I/O completes,
   exposing unwritten blocks to intermediate reads.  (Soln: Force AIO-DIO
   file extends to be synchronous)

3. AIO-DIO writes to holes call aio_complete() before falling back to
   buffered I/O !  (Soln: Avoid calling aio_complete() if -ENOTBLK)

4. AIO-DIO writes to an allocated region followed by a hole, falls back
   to buffered i/o without waiting for already submitted i/o to complete;
   might return to user-space, which could overwrite the buffer contents
   while they are still being written out by the kernel (Soln: Always wait
   for submitted i/o to complete before falling back to buffered i/o)
parent aa34baa2
...@@ -209,7 +209,7 @@ static struct page *dio_get_page(struct dio *dio) ...@@ -209,7 +209,7 @@ static struct page *dio_get_page(struct dio *dio)
*/ */
static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes) static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)
{ {
if (dio->end_io) if (dio->end_io && dio->result)
dio->end_io(dio->inode, offset, bytes, dio->map_bh.b_private); dio->end_io(dio->inode, offset, bytes, dio->map_bh.b_private);
if (dio->needs_locking) if (dio->needs_locking)
up_read(&dio->inode->i_alloc_sem); up_read(&dio->inode->i_alloc_sem);
...@@ -225,8 +225,14 @@ static void finished_one_bio(struct dio *dio) ...@@ -225,8 +225,14 @@ static void finished_one_bio(struct dio *dio)
if (dio->is_async) { if (dio->is_async) {
dio_complete(dio, dio->block_in_file << dio->blkbits, dio_complete(dio, dio->block_in_file << dio->blkbits,
dio->result); dio->result);
/* Complete AIO later if falling back to buffered i/o */
if (dio->result != -ENOTBLK) {
aio_complete(dio->iocb, dio->result, 0); aio_complete(dio->iocb, dio->result, 0);
kfree(dio); kfree(dio);
} else {
if (dio->waiter)
wake_up_process(dio->waiter);
}
} }
} }
} }
...@@ -877,8 +883,6 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -877,8 +883,6 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
int ret2; int ret2;
size_t bytes; size_t bytes;
dio->is_async = !is_sync_kiocb(iocb);
dio->bio = NULL; dio->bio = NULL;
dio->inode = inode; dio->inode = inode;
dio->rw = rw; dio->rw = rw;
...@@ -975,10 +979,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -975,10 +979,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
dio_cleanup(dio); dio_cleanup(dio);
/* /*
* All new block allocations have been performed. We can let i_sem * All block lookups have been performed. For READ requests
* go now. * we can let i_sem go now that its achieved its purpose
* of protecting us from looking up uninitialized blocks.
*/ */
if (dio->needs_locking) if ((rw == READ) && dio->needs_locking)
up(&dio->inode->i_sem); up(&dio->inode->i_sem);
/* /*
...@@ -988,8 +993,31 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -988,8 +993,31 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
if (dio->is_async) { if (dio->is_async) {
if (ret == 0) if (ret == 0)
ret = dio->result; /* Bytes written */ ret = dio->result; /* Bytes written */
if (ret == -ENOTBLK) {
/*
* The request will be reissued via buffered I/O
* when we return; Any I/O already issued
* effectively becomes redundant.
*/
dio->result = ret;
dio->waiter = current;
}
finished_one_bio(dio); /* This can free the dio */ finished_one_bio(dio); /* This can free the dio */
blk_run_queues(); blk_run_queues();
if (ret == -ENOTBLK) {
/*
* Wait for already issued I/O to drain out and
* release its references to user-space pages
* before returning to fallback on buffered I/O
*/
set_current_state(TASK_UNINTERRUPTIBLE);
while (atomic_read(&dio->bio_count)) {
io_schedule();
set_current_state(TASK_UNINTERRUPTIBLE);
}
set_current_state(TASK_RUNNING);
dio->waiter = NULL;
}
} else { } else {
finished_one_bio(dio); finished_one_bio(dio);
ret2 = dio_await_completion(dio); ret2 = dio_await_completion(dio);
...@@ -1009,6 +1037,9 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -1009,6 +1037,9 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
ret = i_size - offset; ret = i_size - offset;
} }
dio_complete(dio, offset, ret); dio_complete(dio, offset, ret);
/* We could have also come here on an AIO file extend */
if (!is_sync_kiocb(iocb) && (ret != -ENOTBLK))
aio_complete(iocb, ret, 0);
kfree(dio); kfree(dio);
} }
return ret; return ret;
...@@ -1035,6 +1066,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -1035,6 +1066,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
unsigned bdev_blkbits = 0; unsigned bdev_blkbits = 0;
unsigned blocksize_mask = (1 << blkbits) - 1; unsigned blocksize_mask = (1 << blkbits) - 1;
ssize_t retval = -EINVAL; ssize_t retval = -EINVAL;
loff_t end = offset;
struct dio *dio; struct dio *dio;
int needs_locking; int needs_locking;
...@@ -1053,6 +1085,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -1053,6 +1085,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
for (seg = 0; seg < nr_segs; seg++) { for (seg = 0; seg < nr_segs; seg++) {
addr = (unsigned long)iov[seg].iov_base; addr = (unsigned long)iov[seg].iov_base;
size = iov[seg].iov_len; size = iov[seg].iov_len;
end += size;
if ((addr & blocksize_mask) || (size & blocksize_mask)) { if ((addr & blocksize_mask) || (size & blocksize_mask)) {
if (bdev) if (bdev)
blkbits = bdev_blkbits; blkbits = bdev_blkbits;
...@@ -1090,11 +1123,17 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -1090,11 +1123,17 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
down_read(&inode->i_alloc_sem); down_read(&inode->i_alloc_sem);
} }
dio->needs_locking = needs_locking; dio->needs_locking = needs_locking;
/*
* For file extending writes updating i_size before data
* writeouts complete can expose uninitialized blocks. So
* even for AIO, we need to wait for i/o to complete before
* returning in this case.
*/
dio->is_async = !is_sync_kiocb(iocb) && !((rw == WRITE) &&
(end > i_size_read(inode)));
retval = direct_io_worker(rw, iocb, inode, iov, offset, retval = direct_io_worker(rw, iocb, inode, iov, offset,
nr_segs, blkbits, get_blocks, end_io, dio); nr_segs, blkbits, get_blocks, end_io, dio);
if (needs_locking && rw == WRITE)
down(&inode->i_sem);
out: out:
return retval; return retval;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment