Commit d2a82856 authored by Jens Axboe's avatar Jens Axboe Committed by Linus Torvalds

[PATCH] disk barriers: core

IDE disk barrier core.
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 1e11a6c0
......@@ -264,6 +264,45 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
EXPORT_SYMBOL(blk_queue_make_request);
/**
* blk_queue_ordered - does this queue support ordered writes
* @q: the request queue
* @flag: see below
*
* Description:
* For journalled file systems, doing ordered writes on a commit
* block instead of explicitly doing wait_on_buffer (which is bad
* for performance) can be a big win. Block drivers supporting this
* feature should call this function and indicate so.
*
**/
void blk_queue_ordered(request_queue_t *q, int flag)
{
if (flag)
set_bit(QUEUE_FLAG_ORDERED, &q->queue_flags);
else
clear_bit(QUEUE_FLAG_ORDERED, &q->queue_flags);
}
EXPORT_SYMBOL(blk_queue_ordered);
/**
* blk_queue_issue_flush_fn - set function for issuing a flush
* @q: the request queue
* @iff: the function to be called issuing the flush
*
* Description:
* If a driver supports issuing a flush command, the support is notified
* to the block layer by defining it through this call.
*
**/
void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff)
{
q->issue_flush_fn = iff;
}
EXPORT_SYMBOL(blk_queue_issue_flush_fn);
/**
* blk_queue_bounce_limit - set bounce buffer limit for queue
* @q: the request queue for the device
......@@ -1927,10 +1966,11 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
}
rq->flags |= REQ_NOMERGE;
rq->waiting = &wait;
if (!rq->waiting)
rq->waiting = &wait;
elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
generic_unplug_device(q);
wait_for_completion(&wait);
wait_for_completion(rq->waiting);
rq->waiting = NULL;
if (rq->errors)
......@@ -1941,6 +1981,72 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
EXPORT_SYMBOL(blk_execute_rq);
/**
* blkdev_issue_flush - queue a flush
* @bdev: blockdev to issue flush for
* @error_sector: error sector
*
* Description:
* Issue a flush for the block device in question. Caller can supply
* room for storing the error offset in case of a flush error, if they
* wish to. Caller must run wait_for_completion() on its own.
*/
int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
{
request_queue_t *q;
if (bdev->bd_disk == NULL)
return -ENXIO;
q = bdev_get_queue(bdev);
if (!q)
return -ENXIO;
if (!q->issue_flush_fn)
return -EOPNOTSUPP;
return q->issue_flush_fn(q, bdev->bd_disk, error_sector);
}
EXPORT_SYMBOL(blkdev_issue_flush);
/**
* blkdev_scsi_issue_flush_fn - issue flush for SCSI devices
* @q: device queue
* @disk: gendisk
* @error_sector: error offset
*
* Description:
* Devices understanding the SCSI command set, can use this function as
* a helper for issuing a cache flush. Note: driver is required to store
* the error offset (in case of error flushing) in ->sector of struct
* request.
*/
int blkdev_scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk,
sector_t *error_sector)
{
struct request *rq = blk_get_request(q, WRITE, __GFP_WAIT);
int ret;
rq->flags |= REQ_BLOCK_PC | REQ_SOFTBARRIER;
rq->sector = 0;
memset(rq->cmd, 0, sizeof(rq->cmd));
rq->cmd[0] = 0x35;
rq->cmd_len = 12;
rq->data = NULL;
rq->data_len = 0;
rq->timeout = 60 * HZ;
ret = blk_execute_rq(q, disk, rq);
if (ret && error_sector)
*error_sector = rq->sector;
blk_put_request(rq);
return ret;
}
EXPORT_SYMBOL(blkdev_scsi_issue_flush_fn);
void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
{
int rw = rq_data_dir(rq);
......@@ -2194,7 +2300,7 @@ EXPORT_SYMBOL(__blk_attempt_remerge);
static int __make_request(request_queue_t *q, struct bio *bio)
{
struct request *req, *freereq = NULL;
int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, ra;
int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err;
sector_t sector;
sector = bio->bi_sector;
......@@ -2212,9 +2318,11 @@ static int __make_request(request_queue_t *q, struct bio *bio)
spin_lock_prefetch(q->queue_lock);
barrier = test_bit(BIO_RW_BARRIER, &bio->bi_rw);
ra = bio->bi_rw & (1 << BIO_RW_AHEAD);
barrier = bio_barrier(bio);
if (barrier && !(q->queue_flags & (1 << QUEUE_FLAG_ORDERED))) {
err = -EOPNOTSUPP;
goto end_io;
}
again:
spin_lock_irq(q->queue_lock);
......@@ -2294,7 +2402,8 @@ static int __make_request(request_queue_t *q, struct bio *bio)
/*
* READA bit set
*/
if (ra)
err = -EWOULDBLOCK;
if (bio_rw_ahead(bio))
goto end_io;
freereq = get_request_wait(q, rw);
......@@ -2305,10 +2414,9 @@ static int __make_request(request_queue_t *q, struct bio *bio)
req->flags |= REQ_CMD;
/*
* inherit FAILFAST from bio and don't stack up
* retries for read ahead
* inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
*/
if (ra || test_bit(BIO_RW_FAILFAST, &bio->bi_rw))
if (bio_rw_ahead(bio) || bio_failfast(bio))
req->flags |= REQ_FAILFAST;
/*
......@@ -2342,7 +2450,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
return 0;
end_io:
bio_endio(bio, nr_sectors << 9, -EWOULDBLOCK);
bio_endio(bio, nr_sectors << 9, err);
return 0;
}
......@@ -2649,9 +2757,16 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect)
static int __end_that_request_first(struct request *req, int uptodate,
int nr_bytes)
{
int total_bytes, bio_nbytes, error = 0, next_idx = 0;
int total_bytes, bio_nbytes, error, next_idx = 0;
struct bio *bio;
/*
* extend uptodate bool to allow < 0 value to be direct io error
*/
error = 0;
if (end_io_error(uptodate))
error = !uptodate ? -EIO : uptodate;
/*
* for a REQ_BLOCK_PC request, we want to carry any eventual
* sense key with us all the way through
......@@ -2660,7 +2775,6 @@ static int __end_that_request_first(struct request *req, int uptodate,
req->errors = 0;
if (!uptodate) {
error = -EIO;
if (blk_fs_request(req) && !(req->flags & REQ_QUIET))
printk("end_request: I/O error, dev %s, sector %llu\n",
req->rq_disk ? req->rq_disk->disk_name : "?",
......@@ -2743,7 +2857,7 @@ static int __end_that_request_first(struct request *req, int uptodate,
/**
* end_that_request_first - end I/O on a request
* @req: the request being processed
* @uptodate: 0 for I/O error
* @uptodate: 1 for success, 0 for I/O error, < 0 for specific error
* @nr_sectors: number of sectors to end I/O on
*
* Description:
......@@ -2764,7 +2878,7 @@ EXPORT_SYMBOL(end_that_request_first);
/**
* end_that_request_chunk - end I/O on a request
* @req: the request being processed
* @uptodate: 0 for I/O error
* @uptodate: 1 for success, 0 for I/O error, < 0 for specific error
* @nr_bytes: number of bytes to complete
*
* Description:
......
......@@ -2756,21 +2756,31 @@ static int end_bio_bh_io_sync(struct bio *bio, unsigned int bytes_done, int err)
if (bio->bi_size)
return 1;
if (err == -EOPNOTSUPP)
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
bio_put(bio);
return 0;
}
void submit_bh(int rw, struct buffer_head * bh)
int submit_bh(int rw, struct buffer_head * bh)
{
struct bio *bio;
int ret = 0;
BUG_ON(!buffer_locked(bh));
BUG_ON(!buffer_mapped(bh));
BUG_ON(!bh->b_end_io);
/* Only clear out a write error when rewriting */
if (test_set_buffer_req(bh) && rw == WRITE)
if (buffer_ordered(bh) && (rw == WRITE))
rw = WRITE_BARRIER;
/*
* Only clear out a write error when rewriting, should this
* include WRITE_SYNC as well?
*/
if (test_set_buffer_req(bh) && (rw == WRITE || rw == WRITE_BARRIER))
clear_buffer_write_io_error(bh);
/*
......@@ -2792,7 +2802,14 @@ void submit_bh(int rw, struct buffer_head * bh)
bio->bi_end_io = end_bio_bh_io_sync;
bio->bi_private = bh;
bio_get(bio);
submit_bio(rw, bio);
if (bio_flagged(bio, BIO_EOPNOTSUPP))
ret = -EOPNOTSUPP;
bio_put(bio);
return ret;
}
/**
......
......@@ -121,6 +121,7 @@ struct bio {
#define BIO_CLONED 4 /* doesn't own data */
#define BIO_BOUNCED 5 /* bio is a bounce bio */
#define BIO_USER_MAPPED 6 /* contains user pages */
#define BIO_EOPNOTSUPP 7 /* not supported */
#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
/*
......@@ -160,6 +161,8 @@ struct bio {
#define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio)))
#define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER))
#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC))
#define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
#define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD))
/*
* will die
......
......@@ -195,6 +195,8 @@ enum rq_flag_bits {
__REQ_PM_SUSPEND, /* suspend request */
__REQ_PM_RESUME, /* resume request */
__REQ_PM_SHUTDOWN, /* shutdown request */
__REQ_BAR_PREFLUSH, /* barrier pre-flush done */
__REQ_BAR_POSTFLUSH, /* barrier post-flush */
__REQ_NR_BITS, /* stops here */
};
......@@ -220,6 +222,8 @@ enum rq_flag_bits {
#define REQ_PM_SUSPEND (1 << __REQ_PM_SUSPEND)
#define REQ_PM_RESUME (1 << __REQ_PM_RESUME)
#define REQ_PM_SHUTDOWN (1 << __REQ_PM_SHUTDOWN)
#define REQ_BAR_PREFLUSH (1 << __REQ_BAR_PREFLUSH)
#define REQ_BAR_POSTFLUSH (1 << __REQ_BAR_POSTFLUSH)
/*
* State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME
......@@ -248,6 +252,7 @@ typedef void (unplug_fn) (request_queue_t *);
struct bio_vec;
typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *);
typedef void (activity_fn) (void *data, int rw);
typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *);
enum blk_queue_state {
Queue_down,
......@@ -290,6 +295,7 @@ struct request_queue
unplug_fn *unplug_fn;
merge_bvec_fn *merge_bvec_fn;
activity_fn *activity_fn;
issue_flush_fn *issue_flush_fn;
/*
* Auto-unplugging state
......@@ -373,6 +379,7 @@ struct request_queue
#define QUEUE_FLAG_DEAD 5 /* queue being torn down */
#define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */
#define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */
#define QUEUE_FLAG_ORDERED 8 /* supports ordered writes */
#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
......@@ -390,6 +397,10 @@ struct request_queue
#define blk_pm_request(rq) \
((rq)->flags & (REQ_PM_SUSPEND | REQ_PM_RESUME))
#define blk_barrier_rq(rq) ((rq)->flags & REQ_HARDBARRIER)
#define blk_barrier_preflush(rq) ((rq)->flags & REQ_BAR_PREFLUSH)
#define blk_barrier_postflush(rq) ((rq)->flags & REQ_BAR_POSTFLUSH)
#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
#define rq_data_dir(rq) ((rq)->flags & 1)
......@@ -560,6 +571,14 @@ extern void end_that_request_last(struct request *);
extern int process_that_request_first(struct request *, unsigned int);
extern void end_request(struct request *req, int uptodate);
/*
* end_that_request_first/chunk() takes an uptodate argument. we account
* any value <= as an io error. 0 means -EIO for compatability reasons,
* any other < 0 value is the direct error type. An uptodate value of
* 1 indicates successful io completion
*/
#define end_io_error(uptodate) (unlikely((uptodate) <= 0))
static inline void blkdev_dequeue_request(struct request *req)
{
BUG_ON(list_empty(&req->queuelist));
......@@ -588,6 +607,9 @@ extern void blk_queue_prep_rq(request_queue_t *, prep_rq_fn *pfn);
extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *);
extern void blk_queue_dma_alignment(request_queue_t *, int);
extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
extern void blk_queue_ordered(request_queue_t *, int);
extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *);
extern int blkdev_scsi_issue_flush_fn(request_queue_t *, struct gendisk *, sector_t *);
extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *);
extern void blk_dump_rq_flags(struct request *, char *);
......@@ -616,6 +638,7 @@ extern long blk_congestion_wait(int rw, long timeout);
extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *);
extern void blk_rq_prep_restart(struct request *);
extern int blkdev_issue_flush(struct block_device *, sector_t *);
#define MAX_PHYS_SEGMENTS 128
#define MAX_HW_SEGMENTS 128
......
......@@ -26,6 +26,7 @@ enum bh_state_bits {
BH_Delay, /* Buffer is not yet allocated on disk */
BH_Boundary, /* Block is followed by a discontiguity */
BH_Write_EIO, /* I/O error on write */
BH_Ordered, /* ordered write */
BH_PrivateStart,/* not a state bit, but the first bit available
* for private allocation by other entities
......@@ -110,7 +111,8 @@ BUFFER_FNS(Async_Read, async_read)
BUFFER_FNS(Async_Write, async_write)
BUFFER_FNS(Delay, delay)
BUFFER_FNS(Boundary, boundary)
BUFFER_FNS(Write_EIO,write_io_error)
BUFFER_FNS(Write_EIO, write_io_error)
BUFFER_FNS(Ordered, ordered)
#define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK)
#define touch_buffer(bh) mark_page_accessed(bh->b_page)
......@@ -173,7 +175,7 @@ void FASTCALL(unlock_buffer(struct buffer_head *bh));
void FASTCALL(__lock_buffer(struct buffer_head *bh));
void ll_rw_block(int, int, struct buffer_head * bh[]);
void sync_dirty_buffer(struct buffer_head *bh);
void submit_bh(int, struct buffer_head *);
int submit_bh(int, struct buffer_head *);
void write_boundary_block(struct block_device *bdev,
sector_t bblock, unsigned blocksize);
......
......@@ -88,6 +88,7 @@ extern int leases_enable, dir_notify_enable, lease_break_time;
#define SPECIAL 4 /* For non-blockdevice requests in request queue */
#define READ_SYNC (READ | (1 << BIO_RW_SYNC))
#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC))
#define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER))
#define SEL_IN 1
#define SEL_OUT 2
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment