Commit 9b1ce7f0 authored by Damien Le Moal's avatar Damien Le Moal Committed by Jens Axboe

block: Implement zone append emulation

Given that zone write plugging manages all writes to zones of a zoned
block device and tracks the write pointer position of all zones that are
not full nor empty, emulating zone append operations using regular
writes can be implemented generically, without relying on the underlying
device driver to implement such emulation. This is needed for devices
that do not natively support the zone append command (e.g. SMR
hard-disks).

A device may request zone append emulation by setting its
max_zone_append_sectors queue limit to 0. For such device, the function
blk_zone_wplug_prepare_bio() changes zone append BIOs into
non-mergeable regular write BIOs. Modified zone append BIOs are flagged
with the new BIO flag BIO_EMULATES_ZONE_APPEND. This flag is checked
on completion of the BIO in blk_zone_write_plug_bio_endio() to restore
the original REQ_OP_ZONE_APPEND operation code of the BIO.

The block layer internal inline helper function bio_is_zone_append() is
added to test if a BIO is either a native zone append operation
(REQ_OP_ZONE_APPEND operation code) or if it is flagged with
BIO_EMULATES_ZONE_APPEND. Given that both native and emulated zone
append BIO completion handling should be similar, The functions
blk_update_request() and blk_zone_complete_request_bio() are modified to
use bio_is_zone_append() to execute blk_zone_update_request_bio() for
both native and emulated zone append operations.

This commit contains contributions from Christoph Hellwig <hch@lst.de>.
Signed-off-by: default avatarDamien Le Moal <dlemoal@kernel.org>
Reviewed-by: default avatarHannes Reinecke <hare@suse.de>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarBart Van Assche <bvanassche@acm.org>
Tested-by: default avatarHans Holmberg <hans.holmberg@wdc.com>
Tested-by: default avatarDennis Maisenbacher <dennis.maisenbacher@wdc.com>
Reviewed-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240408014128.205141-11-dlemoal@kernel.orgSigned-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent ccdbf0aa
...@@ -906,8 +906,7 @@ bool blk_update_request(struct request *req, blk_status_t error, ...@@ -906,8 +906,7 @@ bool blk_update_request(struct request *req, blk_status_t error,
if (bio_bytes == bio->bi_iter.bi_size) { if (bio_bytes == bio->bi_iter.bi_size) {
req->bio = bio->bi_next; req->bio = bio->bi_next;
} else if (req_op(req) == REQ_OP_ZONE_APPEND && } else if (bio_is_zone_append(bio) && error == BLK_STS_OK) {
error == BLK_STS_OK) {
/* /*
* Partial zone append completions cannot be supported * Partial zone append completions cannot be supported
* as the BIO fragments may end up not being written * as the BIO fragments may end up not being written
......
...@@ -689,7 +689,8 @@ static void disk_zone_wplug_abort_unaligned(struct gendisk *disk, ...@@ -689,7 +689,8 @@ static void disk_zone_wplug_abort_unaligned(struct gendisk *disk,
while ((bio = bio_list_pop(&zwplug->bio_list))) { while ((bio = bio_list_pop(&zwplug->bio_list))) {
if (wp_offset >= zone_capacity || if (wp_offset >= zone_capacity ||
bio_offset_from_zone_start(bio) != wp_offset) { (bio_op(bio) != REQ_OP_ZONE_APPEND &&
bio_offset_from_zone_start(bio) != wp_offset)) {
blk_zone_wplug_bio_io_error(bio); blk_zone_wplug_bio_io_error(bio);
disk_put_zone_wplug(zwplug); disk_put_zone_wplug(zwplug);
continue; continue;
...@@ -951,7 +952,8 @@ static inline void disk_zone_wplug_set_error(struct gendisk *disk, ...@@ -951,7 +952,8 @@ static inline void disk_zone_wplug_set_error(struct gendisk *disk,
/* /*
* Check and prepare a BIO for submission by incrementing the write pointer * Check and prepare a BIO for submission by incrementing the write pointer
* offset of its zone write plug. * offset of its zone write plug and changing zone append operations into
* regular write when zone append emulation is needed.
*/ */
static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug, static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
struct bio *bio) struct bio *bio)
...@@ -966,13 +968,30 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug, ...@@ -966,13 +968,30 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
if (zwplug->wp_offset >= disk->zone_capacity) if (zwplug->wp_offset >= disk->zone_capacity)
goto err; goto err;
/* if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
* Check for non-sequential writes early because we avoid a /*
* whole lot of error handling trouble if we don't send it off * Use a regular write starting at the current write pointer.
* to the driver. * Similarly to native zone append operations, do not allow
*/ * merging.
if (bio_offset_from_zone_start(bio) != zwplug->wp_offset) */
goto err; bio->bi_opf &= ~REQ_OP_MASK;
bio->bi_opf |= REQ_OP_WRITE | REQ_NOMERGE;
bio->bi_iter.bi_sector += zwplug->wp_offset;
/*
* Remember that this BIO is in fact a zone append operation
* so that we can restore its operation code on completion.
*/
bio_set_flag(bio, BIO_EMULATES_ZONE_APPEND);
} else {
/*
* Check for non-sequential writes early because we avoid a
* whole lot of error handling trouble if we don't send it off
* to the driver.
*/
if (bio_offset_from_zone_start(bio) != zwplug->wp_offset)
goto err;
}
/* Advance the zone write pointer offset. */ /* Advance the zone write pointer offset. */
zwplug->wp_offset += bio_sectors(bio); zwplug->wp_offset += bio_sectors(bio);
...@@ -1008,8 +1027,14 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs) ...@@ -1008,8 +1027,14 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
} }
/* Conventional zones do not need write plugging. */ /* Conventional zones do not need write plugging. */
if (disk_zone_is_conv(disk, sector)) if (disk_zone_is_conv(disk, sector)) {
/* Zone append to conventional zones is not allowed. */
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
bio_io_error(bio);
return true;
}
return false; return false;
}
if (bio->bi_opf & REQ_NOWAIT) if (bio->bi_opf & REQ_NOWAIT)
gfp_mask = GFP_NOWAIT; gfp_mask = GFP_NOWAIT;
...@@ -1057,7 +1082,8 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs) ...@@ -1057,7 +1082,8 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
* @bio: The BIO being submitted * @bio: The BIO being submitted
* @nr_segs: The number of physical segments of @bio * @nr_segs: The number of physical segments of @bio
* *
* Handle write and write zeroes operations using zone write plugging. * Handle write, write zeroes and zone append operations requiring emulation
* using zone write plugging.
* *
* Return true whenever @bio execution needs to be delayed through the zone * Return true whenever @bio execution needs to be delayed through the zone
* write plug. Otherwise, return false to let the submission path process * write plug. Otherwise, return false to let the submission path process
...@@ -1096,6 +1122,9 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) ...@@ -1096,6 +1122,9 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
* machinery operates at the request level, below the plug, and * machinery operates at the request level, below the plug, and
* completion of the flush sequence will go through the regular BIO * completion of the flush sequence will go through the regular BIO
* completion, which will handle zone write plugging. * completion, which will handle zone write plugging.
* Zone append operations for devices that requested emulation must
* also be plugged so that these BIOs can be changed into regular
* write BIOs.
* Zone reset, reset all and finish commands need special treatment * Zone reset, reset all and finish commands need special treatment
* to correctly track the write pointer offset of zones. These commands * to correctly track the write pointer offset of zones. These commands
* are not plugged as we do not need serialization with write * are not plugged as we do not need serialization with write
...@@ -1103,6 +1132,10 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) ...@@ -1103,6 +1132,10 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
* and finish commands when write operations are in flight. * and finish commands when write operations are in flight.
*/ */
switch (bio_op(bio)) { switch (bio_op(bio)) {
case REQ_OP_ZONE_APPEND:
if (!bdev_emulates_zone_append(bdev))
return false;
fallthrough;
case REQ_OP_WRITE: case REQ_OP_WRITE:
case REQ_OP_WRITE_ZEROES: case REQ_OP_WRITE_ZEROES:
return blk_zone_wplug_handle_write(bio, nr_segs); return blk_zone_wplug_handle_write(bio, nr_segs);
...@@ -1171,6 +1204,15 @@ void blk_zone_write_plug_bio_endio(struct bio *bio) ...@@ -1171,6 +1204,15 @@ void blk_zone_write_plug_bio_endio(struct bio *bio)
/* Make sure we do not see this BIO again by clearing the plug flag. */ /* Make sure we do not see this BIO again by clearing the plug flag. */
bio_clear_flag(bio, BIO_ZONE_WRITE_PLUGGING); bio_clear_flag(bio, BIO_ZONE_WRITE_PLUGGING);
/*
* If this is a regular write emulating a zone append operation,
* restore the original operation code.
*/
if (bio_flagged(bio, BIO_EMULATES_ZONE_APPEND)) {
bio->bi_opf &= ~REQ_OP_MASK;
bio->bi_opf |= REQ_OP_ZONE_APPEND;
}
/* /*
* If the BIO failed, mark the plug as having an error to trigger * If the BIO failed, mark the plug as having an error to trigger
* recovery. * recovery.
......
...@@ -421,6 +421,11 @@ static inline bool bio_zone_write_plugging(struct bio *bio) ...@@ -421,6 +421,11 @@ static inline bool bio_zone_write_plugging(struct bio *bio)
{ {
return bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING); return bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING);
} }
static inline bool bio_is_zone_append(struct bio *bio)
{
return bio_op(bio) == REQ_OP_ZONE_APPEND ||
bio_flagged(bio, BIO_EMULATES_ZONE_APPEND);
}
void blk_zone_write_plug_bio_merged(struct bio *bio); void blk_zone_write_plug_bio_merged(struct bio *bio);
void blk_zone_write_plug_attempt_merge(struct request *rq); void blk_zone_write_plug_attempt_merge(struct request *rq);
static inline void blk_zone_update_request_bio(struct request *rq, static inline void blk_zone_update_request_bio(struct request *rq,
...@@ -430,8 +435,9 @@ static inline void blk_zone_update_request_bio(struct request *rq, ...@@ -430,8 +435,9 @@ static inline void blk_zone_update_request_bio(struct request *rq,
* For zone append requests, the request sector indicates the location * For zone append requests, the request sector indicates the location
* at which the BIO data was written. Return this value to the BIO * at which the BIO data was written. Return this value to the BIO
* issuer through the BIO iter sector. * issuer through the BIO iter sector.
* For plugged zone writes, we need the original BIO sector so * For plugged zone writes, which include emulated zone append, we need
* that blk_zone_write_plug_bio_endio() can lookup the zone write plug. * the original BIO sector so that blk_zone_write_plug_bio_endio() can
* lookup the zone write plug.
*/ */
if (req_op(rq) == REQ_OP_ZONE_APPEND || bio_zone_write_plugging(bio)) if (req_op(rq) == REQ_OP_ZONE_APPEND || bio_zone_write_plugging(bio))
bio->bi_iter.bi_sector = rq->__sector; bio->bi_iter.bi_sector = rq->__sector;
...@@ -468,6 +474,10 @@ static inline bool bio_zone_write_plugging(struct bio *bio) ...@@ -468,6 +474,10 @@ static inline bool bio_zone_write_plugging(struct bio *bio)
{ {
return false; return false;
} }
static inline bool bio_is_zone_append(struct bio *bio)
{
return false;
}
static inline void blk_zone_write_plug_bio_merged(struct bio *bio) static inline void blk_zone_write_plug_bio_merged(struct bio *bio)
{ {
} }
......
...@@ -311,6 +311,7 @@ enum { ...@@ -311,6 +311,7 @@ enum {
BIO_REMAPPED, BIO_REMAPPED,
BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */
BIO_ZONE_WRITE_PLUGGING, /* bio handled through zone write plugging */ BIO_ZONE_WRITE_PLUGGING, /* bio handled through zone write plugging */
BIO_EMULATES_ZONE_APPEND, /* bio emulates a zone append operation */
BIO_FLAG_LAST BIO_FLAG_LAST
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment