Commit 67f2a930 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-4.20/dm-fixes' of...

Merge tag 'for-4.20/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - Fix DM cache metadata to verify that a cache has block before trying
   to continue with operation that requires them.

 - Fix bio-based DM core's dm_make_request() to properly impose device
   limits on individual bios by making use of blk_queue_split().

 - Fix long-standing race with how DM thinp notified userspace of
   thin-pool mode state changes before they were actually made.

 - Fix the zoned target's bio completion handling; this is a fairly
   invassive fix at this stage but it is localized to the zoned target.
   Any zoned target users will benefit from this fix.

* tag 'for-4.20/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm thin: bump target version
  dm thin: send event about thin-pool state change _after_ making it
  dm zoned: Fix target BIO completion handling
  dm: call blk_queue_split() to impose device limits on bios
  dm cache metadata: verify cache has blocks in blocks_are_clean_separate_dirty()
parents 14a996c3 2af6c070
......@@ -930,6 +930,10 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd,
bool dirty_flag;
*result = true;
if (from_cblock(cmd->cache_blocks) == 0)
/* Nothing to do */
return 0;
r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root,
from_cblock(cmd->cache_blocks), &cmd->dirty_cursor);
if (r) {
......
......@@ -195,7 +195,7 @@ static void throttle_unlock(struct throttle *t)
struct dm_thin_new_mapping;
/*
* The pool runs in 4 modes. Ordered in degraded order for comparisons.
* The pool runs in various modes. Ordered in degraded order for comparisons.
*/
enum pool_mode {
PM_WRITE, /* metadata may be changed */
......@@ -282,9 +282,38 @@ struct pool {
mempool_t mapping_pool;
};
static enum pool_mode get_pool_mode(struct pool *pool);
static void metadata_operation_failed(struct pool *pool, const char *op, int r);
static enum pool_mode get_pool_mode(struct pool *pool)
{
return pool->pf.mode;
}
static void notify_of_pool_mode_change(struct pool *pool)
{
const char *descs[] = {
"write",
"out-of-data-space",
"read-only",
"read-only",
"fail"
};
const char *extra_desc = NULL;
enum pool_mode mode = get_pool_mode(pool);
if (mode == PM_OUT_OF_DATA_SPACE) {
if (!pool->pf.error_if_no_space)
extra_desc = " (queue IO)";
else
extra_desc = " (error IO)";
}
dm_table_event(pool->ti->table);
DMINFO("%s: switching pool to %s%s mode",
dm_device_name(pool->pool_md),
descs[(int)mode], extra_desc ? : "");
}
/*
* Target context for a pool.
*/
......@@ -2351,8 +2380,6 @@ static void do_waker(struct work_struct *ws)
queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
}
static void notify_of_pool_mode_change_to_oods(struct pool *pool);
/*
* We're holding onto IO to allow userland time to react. After the
* timeout either the pool will have been resized (and thus back in
......@@ -2365,7 +2392,7 @@ static void do_no_space_timeout(struct work_struct *ws)
if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
pool->pf.error_if_no_space = true;
notify_of_pool_mode_change_to_oods(pool);
notify_of_pool_mode_change(pool);
error_retry_list_with_code(pool, BLK_STS_NOSPC);
}
}
......@@ -2433,26 +2460,6 @@ static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *))
/*----------------------------------------------------------------*/
static enum pool_mode get_pool_mode(struct pool *pool)
{
return pool->pf.mode;
}
static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode)
{
dm_table_event(pool->ti->table);
DMINFO("%s: switching pool to %s mode",
dm_device_name(pool->pool_md), new_mode);
}
static void notify_of_pool_mode_change_to_oods(struct pool *pool)
{
if (!pool->pf.error_if_no_space)
notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)");
else
notify_of_pool_mode_change(pool, "out-of-data-space (error IO)");
}
static bool passdown_enabled(struct pool_c *pt)
{
return pt->adjusted_pf.discard_passdown;
......@@ -2501,8 +2508,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
switch (new_mode) {
case PM_FAIL:
if (old_mode != new_mode)
notify_of_pool_mode_change(pool, "failure");
dm_pool_metadata_read_only(pool->pmd);
pool->process_bio = process_bio_fail;
pool->process_discard = process_bio_fail;
......@@ -2516,8 +2521,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
case PM_OUT_OF_METADATA_SPACE:
case PM_READ_ONLY:
if (!is_read_only_pool_mode(old_mode))
notify_of_pool_mode_change(pool, "read-only");
dm_pool_metadata_read_only(pool->pmd);
pool->process_bio = process_bio_read_only;
pool->process_discard = process_bio_success;
......@@ -2538,8 +2541,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
* alarming rate. Adjust your low water mark if you're
* frequently seeing this mode.
*/
if (old_mode != new_mode)
notify_of_pool_mode_change_to_oods(pool);
pool->out_of_data_space = true;
pool->process_bio = process_bio_read_only;
pool->process_discard = process_discard_bio;
......@@ -2552,8 +2553,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
break;
case PM_WRITE:
if (old_mode != new_mode)
notify_of_pool_mode_change(pool, "write");
if (old_mode == PM_OUT_OF_DATA_SPACE)
cancel_delayed_work_sync(&pool->no_space_timeout);
pool->out_of_data_space = false;
......@@ -2573,6 +2572,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
* doesn't cause an unexpected mode transition on resume.
*/
pt->adjusted_pf.mode = new_mode;
if (old_mode != new_mode)
notify_of_pool_mode_change(pool);
}
static void abort_transaction(struct pool *pool)
......@@ -4023,7 +4025,7 @@ static struct target_type pool_target = {
.name = "thin-pool",
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
DM_TARGET_IMMUTABLE,
.version = {1, 20, 0},
.version = {1, 21, 0},
.module = THIS_MODULE,
.ctr = pool_ctr,
.dtr = pool_dtr,
......@@ -4397,7 +4399,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type thin_target = {
.name = "thin",
.version = {1, 20, 0},
.version = {1, 21, 0},
.module = THIS_MODULE,
.ctr = thin_ctr,
.dtr = thin_dtr,
......
......@@ -20,7 +20,6 @@ struct dmz_bioctx {
struct dm_zone *zone;
struct bio *bio;
refcount_t ref;
blk_status_t status;
};
/*
......@@ -78,65 +77,66 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status)
{
struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
if (bioctx->status == BLK_STS_OK && status != BLK_STS_OK)
bioctx->status = status;
if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK)
bio->bi_status = status;
if (refcount_dec_and_test(&bioctx->ref)) {
struct dm_zone *zone = bioctx->zone;
if (zone) {
if (bio->bi_status != BLK_STS_OK &&
bio_op(bio) == REQ_OP_WRITE &&
dmz_is_seq(zone))
set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags);
dmz_deactivate_zone(zone);
}
bio_endio(bio);
}
}
/*
* Partial clone read BIO completion callback. This terminates the
* Completion callback for an internally cloned target BIO. This terminates the
* target BIO when there are no more references to its context.
*/
static void dmz_read_bio_end_io(struct bio *bio)
static void dmz_clone_endio(struct bio *clone)
{
struct dmz_bioctx *bioctx = bio->bi_private;
blk_status_t status = bio->bi_status;
struct dmz_bioctx *bioctx = clone->bi_private;
blk_status_t status = clone->bi_status;
bio_put(bio);
bio_put(clone);
dmz_bio_endio(bioctx->bio, status);
}
/*
* Issue a BIO to a zone. The BIO may only partially process the
* Issue a clone of a target BIO. The clone may only partially process the
* original target BIO.
*/
static int dmz_submit_read_bio(struct dmz_target *dmz, struct dm_zone *zone,
static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
struct bio *bio, sector_t chunk_block,
unsigned int nr_blocks)
{
struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
sector_t sector;
struct bio *clone;
/* BIO remap sector */
sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
/* If the read is not partial, there is no need to clone the BIO */
if (nr_blocks == dmz_bio_blocks(bio)) {
/* Setup and submit the BIO */
bio->bi_iter.bi_sector = sector;
refcount_inc(&bioctx->ref);
generic_make_request(bio);
return 0;
}
/* Partial BIO: we need to clone the BIO */
clone = bio_clone_fast(bio, GFP_NOIO, &dmz->bio_set);
if (!clone)
return -ENOMEM;
/* Setup the clone */
clone->bi_iter.bi_sector = sector;
bio_set_dev(clone, dmz->dev->bdev);
clone->bi_iter.bi_sector =
dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
clone->bi_iter.bi_size = dmz_blk2sect(nr_blocks) << SECTOR_SHIFT;
clone->bi_end_io = dmz_read_bio_end_io;
clone->bi_end_io = dmz_clone_endio;
clone->bi_private = bioctx;
bio_advance(bio, clone->bi_iter.bi_size);
/* Submit the clone */
refcount_inc(&bioctx->ref);
generic_make_request(clone);
if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone))
zone->wp_block += nr_blocks;
return 0;
}
......@@ -214,7 +214,7 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
if (nr_blocks) {
/* Valid blocks found: read them */
nr_blocks = min_t(unsigned int, nr_blocks, end_block - chunk_block);
ret = dmz_submit_read_bio(dmz, rzone, bio, chunk_block, nr_blocks);
ret = dmz_submit_bio(dmz, rzone, bio, chunk_block, nr_blocks);
if (ret)
return ret;
chunk_block += nr_blocks;
......@@ -228,25 +228,6 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
return 0;
}
/*
* Issue a write BIO to a zone.
*/
static void dmz_submit_write_bio(struct dmz_target *dmz, struct dm_zone *zone,
struct bio *bio, sector_t chunk_block,
unsigned int nr_blocks)
{
struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
/* Setup and submit the BIO */
bio_set_dev(bio, dmz->dev->bdev);
bio->bi_iter.bi_sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
refcount_inc(&bioctx->ref);
generic_make_request(bio);
if (dmz_is_seq(zone))
zone->wp_block += nr_blocks;
}
/*
* Write blocks directly in a data zone, at the write pointer.
* If a buffer zone is assigned, invalidate the blocks written
......@@ -265,7 +246,9 @@ static int dmz_handle_direct_write(struct dmz_target *dmz,
return -EROFS;
/* Submit write */
dmz_submit_write_bio(dmz, zone, bio, chunk_block, nr_blocks);
ret = dmz_submit_bio(dmz, zone, bio, chunk_block, nr_blocks);
if (ret)
return ret;
/*
* Validate the blocks in the data zone and invalidate
......@@ -301,7 +284,9 @@ static int dmz_handle_buffered_write(struct dmz_target *dmz,
return -EROFS;
/* Submit write */
dmz_submit_write_bio(dmz, bzone, bio, chunk_block, nr_blocks);
ret = dmz_submit_bio(dmz, bzone, bio, chunk_block, nr_blocks);
if (ret)
return ret;
/*
* Validate the blocks in the buffer zone
......@@ -600,7 +585,6 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
bioctx->zone = NULL;
bioctx->bio = bio;
refcount_set(&bioctx->ref, 1);
bioctx->status = BLK_STS_OK;
/* Set the BIO pending in the flush list */
if (!nr_sectors && bio_op(bio) == REQ_OP_WRITE) {
......@@ -623,35 +607,6 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_SUBMITTED;
}
/*
* Completed target BIO processing.
*/
static int dmz_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error)
{
struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
if (bioctx->status == BLK_STS_OK && *error)
bioctx->status = *error;
if (!refcount_dec_and_test(&bioctx->ref))
return DM_ENDIO_INCOMPLETE;
/* Done */
bio->bi_status = bioctx->status;
if (bioctx->zone) {
struct dm_zone *zone = bioctx->zone;
if (*error && bio_op(bio) == REQ_OP_WRITE) {
if (dmz_is_seq(zone))
set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags);
}
dmz_deactivate_zone(zone);
}
return DM_ENDIO_DONE;
}
/*
* Get zoned device information.
*/
......@@ -946,7 +901,6 @@ static struct target_type dmz_type = {
.ctr = dmz_ctr,
.dtr = dmz_dtr,
.map = dmz_map,
.end_io = dmz_end_io,
.io_hints = dmz_io_hints,
.prepare_ioctl = dmz_prepare_ioctl,
.postsuspend = dmz_suspend,
......
......@@ -1593,6 +1593,8 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
return ret;
}
blk_queue_split(md->queue, &bio);
init_clone_info(&ci, md, map, bio);
if (bio->bi_opf & REQ_PREFLUSH) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment