Commit 359a5de1 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] direct-io support for XFS unwritten extents

From: Nathan Scott <nathans@sgi.com>

This patch adds a mechanism by which a filesystem can register an interest in
the completion of direct I/O.  The completion routine will be given the
inode, an offset and a length, and an optional filesystem-private field.

We have extended the use of the buffer_head-based interface (i.e.
get_block_t) for direct I/O such that the b_private field is now utilised.
It is defined to be initially zero at the start of I/O, and will be passed
into the filesystem unmodified by the VFS with each map request, while
setting up the direct I/O.  Once I/O has completed the final value of this
pointer will be passed into a filesystems I/O completion handler.  This
mechanism can be used to keep track of all of the mapping requests which
encompass an individual direct I/O request.

This has been implemented specifically for XFS, but is done so as to be as
generic as possible.  XFS uses this mechanism to provide support for
unwritten extents - these are file extents which have been pre-allocated
on-disk, but not yet written to (once written, these become regular file
extents, but only once I/O is complete).
parent 14d927a3
......@@ -125,7 +125,7 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
return blockdev_direct_IO(rw, iocb, inode, inode->i_bdev, iov, offset,
nr_segs, blkdev_get_blocks);
nr_segs, blkdev_get_blocks, NULL);
}
static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
......
......@@ -15,6 +15,8 @@
* added support for non-aligned IO.
* 06Nov2002 pbadari@us.ibm.com
* added asynchronous IO support.
* 21Jul2003 nathans@sgi.com
* added IO completion notifier.
*/
#include <linux/kernel.h>
......@@ -74,6 +76,7 @@ struct dio {
int boundary; /* prev block is at a boundary */
int reap_counter; /* rate limit reaping */
get_blocks_t *get_blocks; /* block mapping function */
dio_iodone_t *end_io; /* IO completion function */
sector_t final_block_in_bio; /* current final block in bio + 1 */
sector_t next_block_for_io; /* next block to be put under IO,
in dio_blocks units */
......@@ -192,6 +195,18 @@ static struct page *dio_get_page(struct dio *dio)
return dio->pages[dio->head++];
}
/*
* Called when all DIO BIO I/O has been completed - let the filesystem
* know, if it registered an interest earlier via get_blocks. Pass the
* private field of the map buffer_head so that filesystems can use it
* to hold additional state between get_blocks calls and dio_complete.
*/
static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)
{
if (dio->end_io)
dio->end_io(dio->inode, offset, bytes, dio->map_bh.b_private);
}
/*
* Called when a BIO has been processed. If the count goes to zero then IO is
* complete and we can signal this to the AIO layer.
......@@ -199,7 +214,9 @@ static struct page *dio_get_page(struct dio *dio)
static void finished_one_bio(struct dio *dio)
{
if (atomic_dec_and_test(&dio->bio_count)) {
if(dio->is_async) {
if (dio->is_async) {
dio_complete(dio, dio->block_in_file << dio->blkbits,
dio->result);
aio_complete(dio->iocb, dio->result, 0);
kfree(dio);
}
......@@ -824,7 +841,7 @@ static int do_direct_IO(struct dio *dio)
static int
direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
const struct iovec *iov, loff_t offset, unsigned long nr_segs,
unsigned blkbits, get_blocks_t get_blocks)
unsigned blkbits, get_blocks_t get_blocks, dio_iodone_t end_io)
{
unsigned long user_addr;
int seg;
......@@ -852,6 +869,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
dio->boundary = 0;
dio->reap_counter = 0;
dio->get_blocks = get_blocks;
dio->end_io = end_io;
dio->map_bh.b_private = NULL;
dio->final_block_in_bio = -1;
dio->next_block_for_io = -1;
......@@ -953,6 +972,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
if (rw == READ && (offset + ret > i_size))
ret = i_size - offset;
}
dio_complete(dio, offset, ret);
kfree(dio);
}
return ret;
......@@ -964,7 +984,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
int
blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, const struct iovec *iov, loff_t offset,
unsigned long nr_segs, get_blocks_t get_blocks)
unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io)
{
int seg;
size_t size;
......@@ -999,7 +1019,7 @@ blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
}
retval = direct_io_worker(rw, iocb, inode, iov, offset,
nr_segs, blkbits, get_blocks);
nr_segs, blkbits, get_blocks, end_io);
out:
return retval;
}
......@@ -662,7 +662,7 @@ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, ext2_get_blocks);
offset, nr_segs, ext2_get_blocks, NULL);
}
static int
......
......@@ -1562,7 +1562,8 @@ static int ext3_direct_IO(int rw, struct kiocb *iocb,
}
ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, ext3_direct_io_get_blocks);
offset, nr_segs,
ext3_direct_io_get_blocks, NULL);
out_stop:
if (handle) {
......
......@@ -308,7 +308,7 @@ static int jfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, jfs_get_blocks);
offset, nr_segs, jfs_get_blocks, NULL);
}
struct address_space_operations jfs_aops = {
......
......@@ -76,10 +76,10 @@ linvfs_unwritten_done(
/*
* Issue transactions to convert a buffer range from unwritten
* to written extents.
* to written extents (buffered IO).
*/
STATIC void
linvfs_unwritten_conv(
linvfs_unwritten_convert(
xfs_buf_t *bp)
{
vnode_t *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *);
......@@ -96,6 +96,30 @@ linvfs_unwritten_conv(
pagebuf_iodone(bp, 0, 0);
}
/*
* Issue transactions to convert a buffer range from unwritten
* to written extents (direct IO).
*/
STATIC void
linvfs_unwritten_convert_direct(
struct inode *inode,
loff_t offset,
ssize_t size,
void *private)
{
ASSERT(!private || inode == (struct inode *)private);
/* private indicates an unwritten extent lay beneath this IO,
* see linvfs_get_block_core.
*/
if (private && size > 0) {
vnode_t *vp = LINVFS_GET_VP(inode);
int error;
VOP_BMAP(vp, offset, size, BMAP_UNWRITTEN, NULL, NULL, error);
}
}
STATIC int
map_blocks(
struct inode *inode,
......@@ -456,7 +480,7 @@ map_unwritten(
XFS_BUF_SET_SIZE(pb, size);
XFS_BUF_SET_OFFSET(pb, offset);
XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode));
XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_conv);
XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert);
if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
pagebuf_iodone(pb, 1, 1);
......@@ -804,7 +828,7 @@ STATIC int
linvfs_get_block_core(
struct inode *inode,
sector_t iblock,
int blocks,
unsigned long blocks,
struct buffer_head *bh_result,
int create,
int direct,
......@@ -854,8 +878,11 @@ linvfs_get_block_core(
set_buffer_mapped(bh_result);
}
if (pbmap.pbm_flags & PBMF_UNWRITTEN) {
if (create)
if (create) {
if (direct)
bh_result->b_private = inode;
set_buffer_mapped(bh_result);
}
set_buffer_unwritten(bh_result);
set_buffer_delay(bh_result);
}
......@@ -935,8 +962,8 @@ linvfs_direct_IO(
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
return blockdev_direct_IO(rw, iocb, inode, NULL,
iov, offset, nr_segs, linvfs_get_blocks_direct);
return blockdev_direct_IO(rw, iocb, inode, NULL, iov, offset, nr_segs,
linvfs_get_blocks_direct, linvfs_unwritten_convert_direct);
}
......
......@@ -219,6 +219,8 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock,
typedef int (get_blocks_t)(struct inode *inode, sector_t iblock,
unsigned long max_blocks,
struct buffer_head *bh_result, int create);
typedef void (dio_iodone_t)(struct inode *inode, loff_t offset,
ssize_t bytes, void *private);
/*
* Attribute flags. These should be or-ed together to figure out what
......@@ -1291,7 +1293,7 @@ extern ssize_t generic_file_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset, unsigned long nr_segs);
extern int blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, const struct iovec *iov, loff_t offset,
unsigned long nr_segs, get_blocks_t *get_blocks);
unsigned long nr_segs, get_blocks_t *get_blocks, dio_iodone_t *end_io);
extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos);
ssize_t generic_file_writev(struct file *filp, const struct iovec *iov,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment