Commit 392aaa18 authored by Jens Axboe's avatar Jens Axboe

[PATCH] partial bio completion notification

Make bio->bi_end_io() take bytes_done and actual error as argument. This
enables partial completion of bio's, which is important for latency
reasons (bio can be huge, for slow media we want page-by-page
completions).

I think I got most of the bi_end_io() functions out there, but I might
have missed a few. For the record, if you don't care about partial
completions and just want to be notified when the entire bio completes,
add a

	if (bio->bi_size)
		return 1;

to the top of your bi_end_io(). It should return 0 on completion.
bio_endio() will decrement bio->bi_size appropriately, it's recommended
for people to go through that. Otherwise they will have to control
BIO_UPTODATE and bi_size decrement themselves, there's really no reason
to do that. I've deliberately avoided doing any functional changes to
any of the end_io functions, as I think that would only make the patch
more complex. It's simple right now, but this being i/o paths I prefer
(as usual) to be careful and take small steps. The mpage_end_io_read()
do-vecs-at-the-time change can come right after this, for instance.
parent cf780a87
......@@ -1567,10 +1567,11 @@ static inline void complete_buffers(struct bio *bio, int status)
{
while (bio) {
struct bio *xbh = bio->bi_next;
int nr_sectors = bio_sectors(bio);
bio->bi_next = NULL;
blk_finished_io(bio_sectors(bio));
bio_endio(bio, status);
blk_finished_io(len);
bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO);
bio = xbh;
}
......
......@@ -911,11 +911,13 @@ static inline void complete_buffers(struct bio *bio, int ok)
{
struct bio *xbh;
while(bio) {
int nr_sectors = bio_sectors(bio);
xbh = bio->bi_next;
bio->bi_next = NULL;
blk_finished_io(bio_sectors(bio));
bio_endio(bio, ok);
blk_finished_io(nr_sectors);
bio_endio(bio, nr_sectors << 9, ok ? 0 : -EIO);
bio = xbh;
}
......
......@@ -3846,9 +3846,13 @@ static int check_floppy_change(kdev_t dev)
* a disk in the drive, and whether that disk is writable.
*/
static void floppy_rb0_complete(struct bio *bio)
static int floppy_rb0_complete(struct bio *bio, unsigned int bytes_done, int err)
{
if (bio->bi_size)
return 1;
complete((struct completion*)bio->bi_private);
return 0;
}
static int __floppy_read_block_0(struct block_device *bdev)
......
......@@ -1576,10 +1576,8 @@ static int __make_request(request_queue_t *q, struct bio *bio)
/*
* READA bit set
*/
if (bio->bi_rw & (1 << BIO_RW_AHEAD)) {
set_bit(BIO_RW_BLOCK, &bio->bi_flags);
if (bio_flagged(bio, BIO_RW_AHEAD))
goto end_io;
}
freereq = get_request_wait(q, rw);
spin_lock_irq(q->queue_lock);
......@@ -1616,7 +1614,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
return 0;
end_io:
bio->bi_end_io(bio);
bio_endio(bio, nr_sectors << 9, -EWOULDBLOCK);
return 0;
}
......@@ -1705,7 +1703,7 @@ void generic_make_request(struct bio *bio)
bdevname(bio->bi_bdev),
(long long) bio->bi_sector);
end_io:
bio->bi_end_io(bio);
bio_endio(bio, 0, -EIO);
break;
}
......@@ -1825,6 +1823,7 @@ int end_that_request_first(struct request *req, int uptodate, int nr_sectors)
total_nsect = 0;
while ((bio = req->bio)) {
nsect = bio_iovec(bio)->bv_len >> 9;
total_nsect += nsect;
BIO_BUG_ON(bio_iovec(bio)->bv_len > bio->bi_size);
......@@ -1834,38 +1833,31 @@ int end_that_request_first(struct request *req, int uptodate, int nr_sectors)
if (unlikely(nsect > nr_sectors)) {
int partial = nr_sectors << 9;
bio->bi_size -= partial;
bio_iovec(bio)->bv_offset += partial;
bio_iovec(bio)->bv_len -= partial;
blk_recalc_rq_sectors(req, nr_sectors);
blk_recalc_rq_sectors(req, total_nsect);
blk_recalc_rq_segments(req);
bio_endio(bio, partial, !uptodate ? -EIO : 0);
return 1;
}
/*
* account transfer
* if bio->bi_end_io returns 0, this bio is done. move on
*/
bio->bi_size -= bio_iovec(bio)->bv_len;
req->bio = bio->bi_next;
if (bio_endio(bio, nsect << 9, !uptodate ? -EIO : 0)) {
bio->bi_idx++;
req->bio = bio;
}
nr_sectors -= nsect;
total_nsect += nsect;
if (!bio->bi_size) {
req->bio = bio->bi_next;
bio_endio(bio, uptodate);
total_nsect = 0;
}
if ((bio = req->bio)) {
blk_recalc_rq_sectors(req, nsect);
/*
* end more in this run, or just return 'not-done'
*/
if (unlikely(nr_sectors <= 0)) {
blk_recalc_rq_sectors(req, total_nsect);
blk_recalc_rq_segments(req);
return 1;
}
......
......@@ -374,7 +374,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
return ret;
}
static void loop_end_io_transfer(struct bio *);
static int loop_end_io_transfer(struct bio *, unsigned int, int);
static void loop_put_buffer(struct bio *bio)
{
/*
......@@ -382,6 +382,7 @@ static void loop_put_buffer(struct bio *bio)
*/
if (bio && bio->bi_end_io == loop_end_io_transfer) {
int i;
for (i = 0; i < bio->bi_vcnt; i++)
__free_page(bio->bi_io_vec[i].bv_page);
......@@ -432,19 +433,23 @@ static struct bio *loop_get_bio(struct loop_device *lo)
* bi_end_io context (we don't want to do decrypt of a page with irqs
* disabled)
*/
static void loop_end_io_transfer(struct bio *bio)
static int loop_end_io_transfer(struct bio *bio, unsigned int bytes_done, int err)
{
struct bio *rbh = bio->bi_private;
struct loop_device *lo = &loop_dev[minor(to_kdev_t(rbh->bi_bdev->bd_dev))];
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
if (!uptodate || bio_rw(bio) == WRITE) {
bio_endio(rbh, uptodate);
if (bio->bi_size)
return 1;
if (!err || bio_rw(bio) == WRITE) {
bio_endio(rbh, rbh->bi_size, err);
if (atomic_dec_and_test(&lo->lo_pending))
up(&lo->lo_bh_mutex);
loop_put_buffer(bio);
} else
loop_add_bio(lo, bio);
return 0;
}
static struct bio *loop_get_buffer(struct loop_device *lo, struct bio *rbh)
......@@ -553,7 +558,7 @@ static int loop_make_request(request_queue_t *q, struct bio *old_bio)
up(&lo->lo_bh_mutex);
loop_put_buffer(new_bio);
out:
bio_io_error(old_bio);
bio_io_error(old_bio, old_bio->bi_size);
return 0;
inactive:
spin_unlock_irq(&lo->lo_lock);
......@@ -569,13 +574,13 @@ static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
*/
if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
ret = do_bio_filebacked(lo, bio);
bio_endio(bio, !ret);
bio_endio(bio, bio->bi_size, ret);
} else {
struct bio *rbh = bio->bi_private;
ret = bio_transfer(lo, bio, rbh);
bio_endio(rbh, !ret);
bio_endio(rbh, rbh->bi_size, ret);
loop_put_buffer(bio);
}
}
......
......@@ -277,11 +277,10 @@ static int rd_make_request(request_queue_t * q, struct bio *sbh)
if (rd_blkdev_bio_IO(sbh, minor))
goto fail;
set_bit(BIO_UPTODATE, &sbh->bi_flags);
sbh->bi_end_io(sbh);
bio_endio(sbh, sbh->bi_size, 0);
return 0;
fail:
bio_io_error(sbh);
bio_io_error(sbh, sbh->bi_size);
return 0;
}
......
......@@ -401,12 +401,16 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
return 0;
}
static void bio_end_io_kio(struct bio *bio)
static int bio_end_io_kio(struct bio *bio, unsigned int bytes_done, int error)
{
struct kiobuf *kio = (struct kiobuf *) bio->bi_private;
end_kio_request(kio, test_bit(BIO_UPTODATE, &bio->bi_flags));
if (bio->bi_size)
return 1;
end_kio_request(kio, error);
bio_put(bio);
return 0;
}
/**
......@@ -519,15 +523,15 @@ void ll_rw_kio(int rw, struct kiobuf *kio, struct block_device *bdev, sector_t s
end_kio_request(kio, !err);
}
void bio_endio(struct bio *bio, int uptodate)
int bio_endio(struct bio *bio, unsigned int bytes_done, int error)
{
if (uptodate)
if (!error)
set_bit(BIO_UPTODATE, &bio->bi_flags);
else
clear_bit(BIO_UPTODATE, &bio->bi_flags);
if (bio->bi_end_io)
bio->bi_end_io(bio);
bio->bi_size -= bytes_done;
return bio->bi_end_io(bio, bytes_done, error);
}
static void __init biovec_init_pools(void)
......
......@@ -2413,12 +2413,16 @@ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
return err ? err : transferred;
}
static void end_bio_bh_io_sync(struct bio *bio)
static int end_bio_bh_io_sync(struct bio *bio, unsigned int bytes_done, int err)
{
struct buffer_head *bh = bio->bi_private;
if (bio->bi_size)
return 1;
bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
bio_put(bio);
return 0;
}
int submit_bh(int rw, struct buffer_head * bh)
......
......@@ -151,17 +151,21 @@ static struct page *dio_get_page(struct dio *dio)
* During I/O bi_private points at the dio. After I/O, bi_private is used to
* implement a singly-linked list of completed BIOs, at dio->bio_list.
*/
static void dio_bio_end_io(struct bio *bio)
static int dio_bio_end_io(struct bio *bio, unsigned int bytes_done, int error)
{
struct dio *dio = bio->bi_private;
unsigned long flags;
if (bio->bi_size)
return 1;
spin_lock_irqsave(&dio->bio_list_lock, flags);
bio->bi_private = dio->bio_list;
dio->bio_list = bio;
if (dio->waiter)
wake_up_process(dio->waiter);
spin_unlock_irqrestore(&dio->bio_list_lock, flags);
return 0;
}
static int
......
......@@ -36,11 +36,14 @@
* status of that page is hard. See end_buffer_async_read() for the details.
* There is no point in duplicating all that complexity.
*/
static void mpage_end_io_read(struct bio *bio)
static int mpage_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
if (bio->bi_size)
return 1;
do {
struct page *page = bvec->bv_page;
......@@ -56,13 +59,17 @@ static void mpage_end_io_read(struct bio *bio)
unlock_page(page);
} while (bvec >= bio->bi_io_vec);
bio_put(bio);
return 0;
}
static void mpage_end_io_write(struct bio *bio)
static int mpage_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
if (bio->bi_size)
return 1;
do {
struct page *page = bvec->bv_page;
......@@ -74,6 +81,7 @@ static void mpage_end_io_write(struct bio *bio)
end_page_writeback(page);
} while (bvec >= bio->bi_io_vec);
bio_put(bio);
return 0;
}
struct bio *mpage_bio_submit(int rw, struct bio *bio)
......
......@@ -51,7 +51,7 @@ struct bio_vec {
};
struct bio;
typedef void (bio_end_io_t) (struct bio *);
typedef int (bio_end_io_t) (struct bio *, unsigned int, int);
typedef void (bio_destructor_t) (struct bio *);
/*
......@@ -161,7 +161,7 @@ struct bio {
#define BIO_SEG_BOUNDARY(q, b1, b2) \
BIOVEC_SEG_BOUNDARY((q), __BVEC_END((b1)), __BVEC_START((b2)))
#define bio_io_error(bio) bio_endio((bio), 0)
#define bio_io_error(bio, bytes) bio_endio((bio), (bytes), -EIO)
/*
* drivers should not use the __ version unless they _really_ want to
......@@ -194,7 +194,7 @@ struct bio {
extern struct bio *bio_alloc(int, int);
extern void bio_put(struct bio *);
extern void bio_endio(struct bio *, int);
extern int bio_endio(struct bio *, unsigned int, int);
struct request_queue;
extern inline int bio_phys_segments(struct request_queue *, struct bio *);
extern inline int bio_hw_segments(struct request_queue *, struct bio *);
......
......@@ -291,12 +291,16 @@ static inline void copy_to_high_bio_irq(struct bio *to, struct bio *from)
}
}
static inline void bounce_end_io(struct bio *bio, mempool_t *pool)
static inline int bounce_end_io(struct bio *bio, unsigned int bytes_done,
int error, mempool_t *pool)
{
struct bio *bio_orig = bio->bi_private;
struct bio_vec *bvec, *org_vec;
int i;
if (bio->bi_size)
return 1;
if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
goto out_eio;
......@@ -314,38 +318,43 @@ static inline void bounce_end_io(struct bio *bio, mempool_t *pool)
}
out_eio:
bio_orig->bi_end_io(bio_orig);
bio_endio(bio_orig, bytes_done, error);
bio_put(bio);
return 0;
}
static void bounce_end_io_write(struct bio *bio)
static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done,
int error)
{
bounce_end_io(bio, page_pool);
return bounce_end_io(bio, bytes_done, error, page_pool);
}
static void bounce_end_io_write_isa(struct bio *bio)
static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done,
int error)
{
bounce_end_io(bio, isa_page_pool);
return bounce_end_io(bio, bytes_done, error, isa_page_pool);
}
static inline void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
static inline int __bounce_end_io_read(struct bio *bio, unsigned int done,
int error, mempool_t *pool)
{
struct bio *bio_orig = bio->bi_private;
if (test_bit(BIO_UPTODATE, &bio->bi_flags))
copy_to_high_bio_irq(bio_orig, bio);
bounce_end_io(bio, pool);
return bounce_end_io(bio, done, error, pool);
}
static void bounce_end_io_read(struct bio *bio)
static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
{
__bounce_end_io_read(bio, page_pool);
return __bounce_end_io_read(bio, bytes_done, err, page_pool);
}
static void bounce_end_io_read_isa(struct bio *bio)
static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done,
int err)
{
return __bounce_end_io_read(bio, isa_page_pool);
return __bounce_end_io_read(bio, bytes_done, err, isa_page_pool);
}
void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
......
......@@ -47,22 +47,29 @@ get_swap_bio(int gfp_flags, struct page *page, bio_end_io_t end_io)
return bio;
}
static void end_swap_bio_write(struct bio *bio)
static int end_swap_bio_write(struct bio *bio, unsigned int bytes_done, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct page *page = bio->bi_io_vec[0].bv_page;
if (bio->bi_size)
return 1;
if (!uptodate)
SetPageError(page);
end_page_writeback(page);
bio_put(bio);
return 0;
}
static void end_swap_bio_read(struct bio *bio)
static int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct page *page = bio->bi_io_vec[0].bv_page;
if (bio->bi_size)
return 1;
if (!uptodate) {
SetPageError(page);
ClearPageUptodate(page);
......@@ -71,6 +78,7 @@ static void end_swap_bio_read(struct bio *bio)
}
unlock_page(page);
bio_put(bio);
return 0;
}
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment