Commit dd469a45 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.3/dm-fixes-2' of...

Merge tag 'for-5.3/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - Revert a DM bufio change from during the 5.3 merge window now that a
   proper fix has been made to the block loopback driver.

 - Fix DM kcopyd to wakeup so failed subjobs get completed.

 - Various fixes to DM zoned target to address error handling, and other
   small tweaks (SPDX license identifiers and fix typos).

 - Fix DM integrity range locking race by tracking whether journal has
   changed.

 - Fix DM dust target to detect reads of badblocks beyond the first 512b
   sector (applicable if blocksize is larger than 512b).

 - Fix DM persistent-data issue in both the DM btree and DM
   space-map-metadata interfaces.

 - Fix out of bounds memory access with certain DM table configurations.

* tag 'for-5.3/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm table: fix invalid memory accesses with too high sector number
  dm space map metadata: fix missing store of apply_bops() return value
  dm btree: fix order of block initialization in btree_split_beneath
  dm raid: add missing cleanup in raid_ctr()
  dm zoned: fix potential NULL dereference in dmz_do_reclaim()
  dm dust: use dust block size for badblocklist index
  dm integrity: fix a crash due to BUG_ON in __journal_read_write()
  dm zoned: fix a few typos
  dm zoned: add SPDX license identifiers
  dm zoned: properly handle backing device failure
  dm zoned: improve error handling in i/o map code
  dm zoned: improve error handling in reclaim
  dm kcopyd: always complete failed jobs
  Revert "dm bufio: fix deadlock with loop device"
parents f576518c 1cfd5d33
...@@ -1599,7 +1599,9 @@ dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) ...@@ -1599,7 +1599,9 @@ dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
unsigned long freed; unsigned long freed;
c = container_of(shrink, struct dm_bufio_client, shrinker); c = container_of(shrink, struct dm_bufio_client, shrinker);
if (!dm_bufio_trylock(c)) if (sc->gfp_mask & __GFP_FS)
dm_bufio_lock(c);
else if (!dm_bufio_trylock(c))
return SHRINK_STOP; return SHRINK_STOP;
freed = __scan(c, sc->nr_to_scan, sc->gfp_mask); freed = __scan(c, sc->nr_to_scan, sc->gfp_mask);
......
...@@ -25,6 +25,7 @@ struct dust_device { ...@@ -25,6 +25,7 @@ struct dust_device {
unsigned long long badblock_count; unsigned long long badblock_count;
spinlock_t dust_lock; spinlock_t dust_lock;
unsigned int blksz; unsigned int blksz;
int sect_per_block_shift;
unsigned int sect_per_block; unsigned int sect_per_block;
sector_t start; sector_t start;
bool fail_read_on_bb:1; bool fail_read_on_bb:1;
...@@ -79,7 +80,7 @@ static int dust_remove_block(struct dust_device *dd, unsigned long long block) ...@@ -79,7 +80,7 @@ static int dust_remove_block(struct dust_device *dd, unsigned long long block)
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&dd->dust_lock, flags); spin_lock_irqsave(&dd->dust_lock, flags);
bblock = dust_rb_search(&dd->badblocklist, block * dd->sect_per_block); bblock = dust_rb_search(&dd->badblocklist, block);
if (bblock == NULL) { if (bblock == NULL) {
if (!dd->quiet_mode) { if (!dd->quiet_mode) {
...@@ -113,7 +114,7 @@ static int dust_add_block(struct dust_device *dd, unsigned long long block) ...@@ -113,7 +114,7 @@ static int dust_add_block(struct dust_device *dd, unsigned long long block)
} }
spin_lock_irqsave(&dd->dust_lock, flags); spin_lock_irqsave(&dd->dust_lock, flags);
bblock->bb = block * dd->sect_per_block; bblock->bb = block;
if (!dust_rb_insert(&dd->badblocklist, bblock)) { if (!dust_rb_insert(&dd->badblocklist, bblock)) {
if (!dd->quiet_mode) { if (!dd->quiet_mode) {
DMERR("%s: block %llu already in badblocklist", DMERR("%s: block %llu already in badblocklist",
...@@ -138,7 +139,7 @@ static int dust_query_block(struct dust_device *dd, unsigned long long block) ...@@ -138,7 +139,7 @@ static int dust_query_block(struct dust_device *dd, unsigned long long block)
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&dd->dust_lock, flags); spin_lock_irqsave(&dd->dust_lock, flags);
bblock = dust_rb_search(&dd->badblocklist, block * dd->sect_per_block); bblock = dust_rb_search(&dd->badblocklist, block);
if (bblock != NULL) if (bblock != NULL)
DMINFO("%s: block %llu found in badblocklist", __func__, block); DMINFO("%s: block %llu found in badblocklist", __func__, block);
else else
...@@ -165,6 +166,7 @@ static int dust_map_read(struct dust_device *dd, sector_t thisblock, ...@@ -165,6 +166,7 @@ static int dust_map_read(struct dust_device *dd, sector_t thisblock,
int ret = DM_MAPIO_REMAPPED; int ret = DM_MAPIO_REMAPPED;
if (fail_read_on_bb) { if (fail_read_on_bb) {
thisblock >>= dd->sect_per_block_shift;
spin_lock_irqsave(&dd->dust_lock, flags); spin_lock_irqsave(&dd->dust_lock, flags);
ret = __dust_map_read(dd, thisblock); ret = __dust_map_read(dd, thisblock);
spin_unlock_irqrestore(&dd->dust_lock, flags); spin_unlock_irqrestore(&dd->dust_lock, flags);
...@@ -195,6 +197,7 @@ static int dust_map_write(struct dust_device *dd, sector_t thisblock, ...@@ -195,6 +197,7 @@ static int dust_map_write(struct dust_device *dd, sector_t thisblock,
unsigned long flags; unsigned long flags;
if (fail_read_on_bb) { if (fail_read_on_bb) {
thisblock >>= dd->sect_per_block_shift;
spin_lock_irqsave(&dd->dust_lock, flags); spin_lock_irqsave(&dd->dust_lock, flags);
__dust_map_write(dd, thisblock); __dust_map_write(dd, thisblock);
spin_unlock_irqrestore(&dd->dust_lock, flags); spin_unlock_irqrestore(&dd->dust_lock, flags);
...@@ -331,6 +334,8 @@ static int dust_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -331,6 +334,8 @@ static int dust_ctr(struct dm_target *ti, unsigned int argc, char **argv)
dd->blksz = blksz; dd->blksz = blksz;
dd->start = tmp; dd->start = tmp;
dd->sect_per_block_shift = __ffs(sect_per_block);
/* /*
* Whether to fail a read on a "bad" block. * Whether to fail a read on a "bad" block.
* Defaults to false; enabled later by message. * Defaults to false; enabled later by message.
......
...@@ -1943,7 +1943,22 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map ...@@ -1943,7 +1943,22 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map
queue_work(ic->wait_wq, &dio->work); queue_work(ic->wait_wq, &dio->work);
return; return;
} }
if (journal_read_pos != NOT_FOUND)
dio->range.n_sectors = ic->sectors_per_block;
wait_and_add_new_range(ic, &dio->range); wait_and_add_new_range(ic, &dio->range);
/*
* wait_and_add_new_range drops the spinlock, so the journal
* may have been changed arbitrarily. We need to recheck.
* To simplify the code, we restrict I/O size to just one block.
*/
if (journal_read_pos != NOT_FOUND) {
sector_t next_sector;
unsigned new_pos = find_journal_node(ic, dio->range.logical_sector, &next_sector);
if (unlikely(new_pos != journal_read_pos)) {
remove_range_unlocked(ic, &dio->range);
goto retry;
}
}
} }
spin_unlock_irq(&ic->endio_wait.lock); spin_unlock_irq(&ic->endio_wait.lock);
......
...@@ -566,8 +566,10 @@ static int run_io_job(struct kcopyd_job *job) ...@@ -566,8 +566,10 @@ static int run_io_job(struct kcopyd_job *job)
* no point in continuing. * no point in continuing.
*/ */
if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) && if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) &&
job->master_job->write_err) job->master_job->write_err) {
job->write_err = job->master_job->write_err;
return -EIO; return -EIO;
}
io_job_start(job->kc->throttle); io_job_start(job->kc->throttle);
...@@ -619,6 +621,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, ...@@ -619,6 +621,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc,
else else
job->read_err = 1; job->read_err = 1;
push(&kc->complete_jobs, job); push(&kc->complete_jobs, job);
wake(kc);
break; break;
} }
......
...@@ -3194,7 +3194,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -3194,7 +3194,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
*/ */
r = rs_prepare_reshape(rs); r = rs_prepare_reshape(rs);
if (r) if (r)
return r; goto bad;
/* Reshaping ain't recovery, so disable recovery */ /* Reshaping ain't recovery, so disable recovery */
rs_setup_recovery(rs, MaxSector); rs_setup_recovery(rs, MaxSector);
......
...@@ -1342,7 +1342,7 @@ void dm_table_event(struct dm_table *t) ...@@ -1342,7 +1342,7 @@ void dm_table_event(struct dm_table *t)
} }
EXPORT_SYMBOL(dm_table_event); EXPORT_SYMBOL(dm_table_event);
sector_t dm_table_get_size(struct dm_table *t) inline sector_t dm_table_get_size(struct dm_table *t)
{ {
return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
} }
...@@ -1367,6 +1367,9 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) ...@@ -1367,6 +1367,9 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
unsigned int l, n = 0, k = 0; unsigned int l, n = 0, k = 0;
sector_t *node; sector_t *node;
if (unlikely(sector >= dm_table_get_size(t)))
return &t->targets[t->num_targets];
for (l = 0; l < t->depth; l++) { for (l = 0; l < t->depth; l++) {
n = get_child(n, k); n = get_child(n, k);
node = get_node(t, l, n); node = get_node(t, l, n);
......
// SPDX-License-Identifier: GPL-2.0-only
/* /*
* Copyright (C) 2017 Western Digital Corporation or its affiliates. * Copyright (C) 2017 Western Digital Corporation or its affiliates.
* *
...@@ -34,7 +35,7 @@ ...@@ -34,7 +35,7 @@
* (1) Super block (1 block) * (1) Super block (1 block)
* (2) Chunk mapping table (nr_map_blocks) * (2) Chunk mapping table (nr_map_blocks)
* (3) Bitmap blocks (nr_bitmap_blocks) * (3) Bitmap blocks (nr_bitmap_blocks)
* All metadata blocks are stored in conventional zones, starting from the * All metadata blocks are stored in conventional zones, starting from
* the first conventional zone found on disk. * the first conventional zone found on disk.
*/ */
struct dmz_super { struct dmz_super {
...@@ -233,7 +234,7 @@ void dmz_unlock_map(struct dmz_metadata *zmd) ...@@ -233,7 +234,7 @@ void dmz_unlock_map(struct dmz_metadata *zmd)
* Lock/unlock metadata access. This is a "read" lock on a semaphore * Lock/unlock metadata access. This is a "read" lock on a semaphore
* that prevents metadata flush from running while metadata are being * that prevents metadata flush from running while metadata are being
* modified. The actual metadata write mutual exclusion is achieved with * modified. The actual metadata write mutual exclusion is achieved with
* the map lock and zone styate management (active and reclaim state are * the map lock and zone state management (active and reclaim state are
* mutually exclusive). * mutually exclusive).
*/ */
void dmz_lock_metadata(struct dmz_metadata *zmd) void dmz_lock_metadata(struct dmz_metadata *zmd)
...@@ -402,15 +403,18 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd, ...@@ -402,15 +403,18 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no; sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no;
struct bio *bio; struct bio *bio;
if (dmz_bdev_is_dying(zmd->dev))
return ERR_PTR(-EIO);
/* Get a new block and a BIO to read it */ /* Get a new block and a BIO to read it */
mblk = dmz_alloc_mblock(zmd, mblk_no); mblk = dmz_alloc_mblock(zmd, mblk_no);
if (!mblk) if (!mblk)
return NULL; return ERR_PTR(-ENOMEM);
bio = bio_alloc(GFP_NOIO, 1); bio = bio_alloc(GFP_NOIO, 1);
if (!bio) { if (!bio) {
dmz_free_mblock(zmd, mblk); dmz_free_mblock(zmd, mblk);
return NULL; return ERR_PTR(-ENOMEM);
} }
spin_lock(&zmd->mblk_lock); spin_lock(&zmd->mblk_lock);
...@@ -541,8 +545,8 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd, ...@@ -541,8 +545,8 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd,
if (!mblk) { if (!mblk) {
/* Cache miss: read the block from disk */ /* Cache miss: read the block from disk */
mblk = dmz_get_mblock_slow(zmd, mblk_no); mblk = dmz_get_mblock_slow(zmd, mblk_no);
if (!mblk) if (IS_ERR(mblk))
return ERR_PTR(-ENOMEM); return mblk;
} }
/* Wait for on-going read I/O and check for error */ /* Wait for on-going read I/O and check for error */
...@@ -570,16 +574,19 @@ static void dmz_dirty_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk) ...@@ -570,16 +574,19 @@ static void dmz_dirty_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk)
/* /*
* Issue a metadata block write BIO. * Issue a metadata block write BIO.
*/ */
static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk,
unsigned int set) unsigned int set)
{ {
sector_t block = zmd->sb[set].block + mblk->no; sector_t block = zmd->sb[set].block + mblk->no;
struct bio *bio; struct bio *bio;
if (dmz_bdev_is_dying(zmd->dev))
return -EIO;
bio = bio_alloc(GFP_NOIO, 1); bio = bio_alloc(GFP_NOIO, 1);
if (!bio) { if (!bio) {
set_bit(DMZ_META_ERROR, &mblk->state); set_bit(DMZ_META_ERROR, &mblk->state);
return; return -ENOMEM;
} }
set_bit(DMZ_META_WRITING, &mblk->state); set_bit(DMZ_META_WRITING, &mblk->state);
...@@ -591,6 +598,8 @@ static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, ...@@ -591,6 +598,8 @@ static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk,
bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO); bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO);
bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0); bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
submit_bio(bio); submit_bio(bio);
return 0;
} }
/* /*
...@@ -602,6 +611,9 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block, ...@@ -602,6 +611,9 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block,
struct bio *bio; struct bio *bio;
int ret; int ret;
if (dmz_bdev_is_dying(zmd->dev))
return -EIO;
bio = bio_alloc(GFP_NOIO, 1); bio = bio_alloc(GFP_NOIO, 1);
if (!bio) if (!bio)
return -ENOMEM; return -ENOMEM;
...@@ -659,22 +671,29 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd, ...@@ -659,22 +671,29 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd,
{ {
struct dmz_mblock *mblk; struct dmz_mblock *mblk;
struct blk_plug plug; struct blk_plug plug;
int ret = 0; int ret = 0, nr_mblks_submitted = 0;
/* Issue writes */ /* Issue writes */
blk_start_plug(&plug); blk_start_plug(&plug);
list_for_each_entry(mblk, write_list, link) list_for_each_entry(mblk, write_list, link) {
dmz_write_mblock(zmd, mblk, set); ret = dmz_write_mblock(zmd, mblk, set);
if (ret)
break;
nr_mblks_submitted++;
}
blk_finish_plug(&plug); blk_finish_plug(&plug);
/* Wait for completion */ /* Wait for completion */
list_for_each_entry(mblk, write_list, link) { list_for_each_entry(mblk, write_list, link) {
if (!nr_mblks_submitted)
break;
wait_on_bit_io(&mblk->state, DMZ_META_WRITING, wait_on_bit_io(&mblk->state, DMZ_META_WRITING,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
if (test_bit(DMZ_META_ERROR, &mblk->state)) { if (test_bit(DMZ_META_ERROR, &mblk->state)) {
clear_bit(DMZ_META_ERROR, &mblk->state); clear_bit(DMZ_META_ERROR, &mblk->state);
ret = -EIO; ret = -EIO;
} }
nr_mblks_submitted--;
} }
/* Flush drive cache (this will also sync data) */ /* Flush drive cache (this will also sync data) */
...@@ -736,6 +755,11 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) ...@@ -736,6 +755,11 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
*/ */
dmz_lock_flush(zmd); dmz_lock_flush(zmd);
if (dmz_bdev_is_dying(zmd->dev)) {
ret = -EIO;
goto out;
}
/* Get dirty blocks */ /* Get dirty blocks */
spin_lock(&zmd->mblk_lock); spin_lock(&zmd->mblk_lock);
list_splice_init(&zmd->mblk_dirty_list, &write_list); list_splice_init(&zmd->mblk_dirty_list, &write_list);
...@@ -1542,7 +1566,7 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd) ...@@ -1542,7 +1566,7 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd)
struct dm_zone *zone; struct dm_zone *zone;
if (list_empty(&zmd->map_rnd_list)) if (list_empty(&zmd->map_rnd_list))
return NULL; return ERR_PTR(-EBUSY);
list_for_each_entry(zone, &zmd->map_rnd_list, link) { list_for_each_entry(zone, &zmd->map_rnd_list, link) {
if (dmz_is_buf(zone)) if (dmz_is_buf(zone))
...@@ -1553,7 +1577,7 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd) ...@@ -1553,7 +1577,7 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd)
return dzone; return dzone;
} }
return NULL; return ERR_PTR(-EBUSY);
} }
/* /*
...@@ -1564,7 +1588,7 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd) ...@@ -1564,7 +1588,7 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd)
struct dm_zone *zone; struct dm_zone *zone;
if (list_empty(&zmd->map_seq_list)) if (list_empty(&zmd->map_seq_list))
return NULL; return ERR_PTR(-EBUSY);
list_for_each_entry(zone, &zmd->map_seq_list, link) { list_for_each_entry(zone, &zmd->map_seq_list, link) {
if (!zone->bzone) if (!zone->bzone)
...@@ -1573,7 +1597,7 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd) ...@@ -1573,7 +1597,7 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd)
return zone; return zone;
} }
return NULL; return ERR_PTR(-EBUSY);
} }
/* /*
...@@ -1628,9 +1652,13 @@ struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, unsigned int chu ...@@ -1628,9 +1652,13 @@ struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, unsigned int chu
if (op != REQ_OP_WRITE) if (op != REQ_OP_WRITE)
goto out; goto out;
/* Alloate a random zone */ /* Allocate a random zone */
dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND);
if (!dzone) { if (!dzone) {
if (dmz_bdev_is_dying(zmd->dev)) {
dzone = ERR_PTR(-EIO);
goto out;
}
dmz_wait_for_free_zones(zmd); dmz_wait_for_free_zones(zmd);
goto again; goto again;
} }
...@@ -1725,9 +1753,13 @@ struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd, ...@@ -1725,9 +1753,13 @@ struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd,
if (bzone) if (bzone)
goto out; goto out;
/* Alloate a random zone */ /* Allocate a random zone */
bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND);
if (!bzone) { if (!bzone) {
if (dmz_bdev_is_dying(zmd->dev)) {
bzone = ERR_PTR(-EIO);
goto out;
}
dmz_wait_for_free_zones(zmd); dmz_wait_for_free_zones(zmd);
goto again; goto again;
} }
......
// SPDX-License-Identifier: GPL-2.0-only
/* /*
* Copyright (C) 2017 Western Digital Corporation or its affiliates. * Copyright (C) 2017 Western Digital Corporation or its affiliates.
* *
...@@ -37,7 +38,7 @@ enum { ...@@ -37,7 +38,7 @@ enum {
/* /*
* Number of seconds of target BIO inactivity to consider the target idle. * Number of seconds of target BIO inactivity to consider the target idle.
*/ */
#define DMZ_IDLE_PERIOD (10UL * HZ) #define DMZ_IDLE_PERIOD (10UL * HZ)
/* /*
* Percentage of unmapped (free) random zones below which reclaim starts * Percentage of unmapped (free) random zones below which reclaim starts
...@@ -134,6 +135,9 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc, ...@@ -134,6 +135,9 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc,
set_bit(DM_KCOPYD_WRITE_SEQ, &flags); set_bit(DM_KCOPYD_WRITE_SEQ, &flags);
while (block < end_block) { while (block < end_block) {
if (dev->flags & DMZ_BDEV_DYING)
return -EIO;
/* Get a valid region from the source zone */ /* Get a valid region from the source zone */
ret = dmz_first_valid_block(zmd, src_zone, &block); ret = dmz_first_valid_block(zmd, src_zone, &block);
if (ret <= 0) if (ret <= 0)
...@@ -215,7 +219,7 @@ static int dmz_reclaim_buf(struct dmz_reclaim *zrc, struct dm_zone *dzone) ...@@ -215,7 +219,7 @@ static int dmz_reclaim_buf(struct dmz_reclaim *zrc, struct dm_zone *dzone)
dmz_unlock_flush(zmd); dmz_unlock_flush(zmd);
return 0; return ret;
} }
/* /*
...@@ -259,7 +263,7 @@ static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, struct dm_zone *dzone) ...@@ -259,7 +263,7 @@ static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
dmz_unlock_flush(zmd); dmz_unlock_flush(zmd);
return 0; return ret;
} }
/* /*
...@@ -312,7 +316,7 @@ static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, struct dm_zone *dzone) ...@@ -312,7 +316,7 @@ static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
dmz_unlock_flush(zmd); dmz_unlock_flush(zmd);
return 0; return ret;
} }
/* /*
...@@ -334,7 +338,7 @@ static void dmz_reclaim_empty(struct dmz_reclaim *zrc, struct dm_zone *dzone) ...@@ -334,7 +338,7 @@ static void dmz_reclaim_empty(struct dmz_reclaim *zrc, struct dm_zone *dzone)
/* /*
* Find a candidate zone for reclaim and process it. * Find a candidate zone for reclaim and process it.
*/ */
static void dmz_reclaim(struct dmz_reclaim *zrc) static int dmz_do_reclaim(struct dmz_reclaim *zrc)
{ {
struct dmz_metadata *zmd = zrc->metadata; struct dmz_metadata *zmd = zrc->metadata;
struct dm_zone *dzone; struct dm_zone *dzone;
...@@ -344,8 +348,8 @@ static void dmz_reclaim(struct dmz_reclaim *zrc) ...@@ -344,8 +348,8 @@ static void dmz_reclaim(struct dmz_reclaim *zrc)
/* Get a data zone */ /* Get a data zone */
dzone = dmz_get_zone_for_reclaim(zmd); dzone = dmz_get_zone_for_reclaim(zmd);
if (!dzone) if (IS_ERR(dzone))
return; return PTR_ERR(dzone);
start = jiffies; start = jiffies;
...@@ -391,13 +395,20 @@ static void dmz_reclaim(struct dmz_reclaim *zrc) ...@@ -391,13 +395,20 @@ static void dmz_reclaim(struct dmz_reclaim *zrc)
out: out:
if (ret) { if (ret) {
dmz_unlock_zone_reclaim(dzone); dmz_unlock_zone_reclaim(dzone);
return; return ret;
} }
(void) dmz_flush_metadata(zrc->metadata); ret = dmz_flush_metadata(zrc->metadata);
if (ret) {
dmz_dev_debug(zrc->dev,
"Metadata flush for zone %u failed, err %d\n",
dmz_id(zmd, rzone), ret);
return ret;
}
dmz_dev_debug(zrc->dev, "Reclaimed zone %u in %u ms", dmz_dev_debug(zrc->dev, "Reclaimed zone %u in %u ms",
dmz_id(zmd, rzone), jiffies_to_msecs(jiffies - start)); dmz_id(zmd, rzone), jiffies_to_msecs(jiffies - start));
return 0;
} }
/* /*
...@@ -427,7 +438,7 @@ static bool dmz_should_reclaim(struct dmz_reclaim *zrc) ...@@ -427,7 +438,7 @@ static bool dmz_should_reclaim(struct dmz_reclaim *zrc)
return false; return false;
/* /*
* If the percentage of unmappped random zones is low, * If the percentage of unmapped random zones is low,
* reclaim even if the target is busy. * reclaim even if the target is busy.
*/ */
return p_unmap_rnd <= DMZ_RECLAIM_LOW_UNMAP_RND; return p_unmap_rnd <= DMZ_RECLAIM_LOW_UNMAP_RND;
...@@ -442,6 +453,10 @@ static void dmz_reclaim_work(struct work_struct *work) ...@@ -442,6 +453,10 @@ static void dmz_reclaim_work(struct work_struct *work)
struct dmz_metadata *zmd = zrc->metadata; struct dmz_metadata *zmd = zrc->metadata;
unsigned int nr_rnd, nr_unmap_rnd; unsigned int nr_rnd, nr_unmap_rnd;
unsigned int p_unmap_rnd; unsigned int p_unmap_rnd;
int ret;
if (dmz_bdev_is_dying(zrc->dev))
return;
if (!dmz_should_reclaim(zrc)) { if (!dmz_should_reclaim(zrc)) {
mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD); mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD);
...@@ -471,7 +486,17 @@ static void dmz_reclaim_work(struct work_struct *work) ...@@ -471,7 +486,17 @@ static void dmz_reclaim_work(struct work_struct *work)
(dmz_target_idle(zrc) ? "Idle" : "Busy"), (dmz_target_idle(zrc) ? "Idle" : "Busy"),
p_unmap_rnd, nr_unmap_rnd, nr_rnd); p_unmap_rnd, nr_unmap_rnd, nr_rnd);
dmz_reclaim(zrc); ret = dmz_do_reclaim(zrc);
if (ret) {
dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret);
if (ret == -EIO)
/*
* LLD might be performing some error handling sequence
* at the underlying device. To not interfere, do not
* attempt to schedule the next reclaim run immediately.
*/
return;
}
dmz_schedule_reclaim(zrc); dmz_schedule_reclaim(zrc);
} }
......
// SPDX-License-Identifier: GPL-2.0-only
/* /*
* Copyright (C) 2017 Western Digital Corporation or its affiliates. * Copyright (C) 2017 Western Digital Corporation or its affiliates.
* *
...@@ -133,6 +134,8 @@ static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone, ...@@ -133,6 +134,8 @@ static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
refcount_inc(&bioctx->ref); refcount_inc(&bioctx->ref);
generic_make_request(clone); generic_make_request(clone);
if (clone->bi_status == BLK_STS_IOERR)
return -EIO;
if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone)) if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone))
zone->wp_block += nr_blocks; zone->wp_block += nr_blocks;
...@@ -277,8 +280,8 @@ static int dmz_handle_buffered_write(struct dmz_target *dmz, ...@@ -277,8 +280,8 @@ static int dmz_handle_buffered_write(struct dmz_target *dmz,
/* Get the buffer zone. One will be allocated if needed */ /* Get the buffer zone. One will be allocated if needed */
bzone = dmz_get_chunk_buffer(zmd, zone); bzone = dmz_get_chunk_buffer(zmd, zone);
if (!bzone) if (IS_ERR(bzone))
return -ENOSPC; return PTR_ERR(bzone);
if (dmz_is_readonly(bzone)) if (dmz_is_readonly(bzone))
return -EROFS; return -EROFS;
...@@ -389,6 +392,11 @@ static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw, ...@@ -389,6 +392,11 @@ static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw,
dmz_lock_metadata(zmd); dmz_lock_metadata(zmd);
if (dmz->dev->flags & DMZ_BDEV_DYING) {
ret = -EIO;
goto out;
}
/* /*
* Get the data zone mapping the chunk. There may be no * Get the data zone mapping the chunk. There may be no
* mapping for read and discard. If a mapping is obtained, * mapping for read and discard. If a mapping is obtained,
...@@ -493,6 +501,8 @@ static void dmz_flush_work(struct work_struct *work) ...@@ -493,6 +501,8 @@ static void dmz_flush_work(struct work_struct *work)
/* Flush dirty metadata blocks */ /* Flush dirty metadata blocks */
ret = dmz_flush_metadata(dmz->metadata); ret = dmz_flush_metadata(dmz->metadata);
if (ret)
dmz_dev_debug(dmz->dev, "Metadata flush failed, rc=%d\n", ret);
/* Process queued flush requests */ /* Process queued flush requests */
while (1) { while (1) {
...@@ -513,22 +523,24 @@ static void dmz_flush_work(struct work_struct *work) ...@@ -513,22 +523,24 @@ static void dmz_flush_work(struct work_struct *work)
* Get a chunk work and start it to process a new BIO. * Get a chunk work and start it to process a new BIO.
* If the BIO chunk has no work yet, create one. * If the BIO chunk has no work yet, create one.
*/ */
static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
{ {
unsigned int chunk = dmz_bio_chunk(dmz->dev, bio); unsigned int chunk = dmz_bio_chunk(dmz->dev, bio);
struct dm_chunk_work *cw; struct dm_chunk_work *cw;
int ret = 0;
mutex_lock(&dmz->chunk_lock); mutex_lock(&dmz->chunk_lock);
/* Get the BIO chunk work. If one is not active yet, create one */ /* Get the BIO chunk work. If one is not active yet, create one */
cw = radix_tree_lookup(&dmz->chunk_rxtree, chunk); cw = radix_tree_lookup(&dmz->chunk_rxtree, chunk);
if (!cw) { if (!cw) {
int ret;
/* Create a new chunk work */ /* Create a new chunk work */
cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOIO); cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOIO);
if (!cw) if (unlikely(!cw)) {
ret = -ENOMEM;
goto out; goto out;
}
INIT_WORK(&cw->work, dmz_chunk_work); INIT_WORK(&cw->work, dmz_chunk_work);
refcount_set(&cw->refcount, 0); refcount_set(&cw->refcount, 0);
...@@ -539,7 +551,6 @@ static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) ...@@ -539,7 +551,6 @@ static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
ret = radix_tree_insert(&dmz->chunk_rxtree, chunk, cw); ret = radix_tree_insert(&dmz->chunk_rxtree, chunk, cw);
if (unlikely(ret)) { if (unlikely(ret)) {
kfree(cw); kfree(cw);
cw = NULL;
goto out; goto out;
} }
} }
...@@ -547,10 +558,38 @@ static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) ...@@ -547,10 +558,38 @@ static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
bio_list_add(&cw->bio_list, bio); bio_list_add(&cw->bio_list, bio);
dmz_get_chunk_work(cw); dmz_get_chunk_work(cw);
dmz_reclaim_bio_acc(dmz->reclaim);
if (queue_work(dmz->chunk_wq, &cw->work)) if (queue_work(dmz->chunk_wq, &cw->work))
dmz_get_chunk_work(cw); dmz_get_chunk_work(cw);
out: out:
mutex_unlock(&dmz->chunk_lock); mutex_unlock(&dmz->chunk_lock);
return ret;
}
/*
* Check the backing device availability. If it's on the way out,
* start failing I/O. Reclaim and metadata components also call this
* function to cleanly abort operation in the event of such failure.
*/
bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev)
{
struct gendisk *disk;
if (!(dmz_dev->flags & DMZ_BDEV_DYING)) {
disk = dmz_dev->bdev->bd_disk;
if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) {
dmz_dev_warn(dmz_dev, "Backing device queue dying");
dmz_dev->flags |= DMZ_BDEV_DYING;
} else if (disk->fops->check_events) {
if (disk->fops->check_events(disk, 0) &
DISK_EVENT_MEDIA_CHANGE) {
dmz_dev_warn(dmz_dev, "Backing device offline");
dmz_dev->flags |= DMZ_BDEV_DYING;
}
}
}
return dmz_dev->flags & DMZ_BDEV_DYING;
} }
/* /*
...@@ -564,6 +603,10 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) ...@@ -564,6 +603,10 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
sector_t sector = bio->bi_iter.bi_sector; sector_t sector = bio->bi_iter.bi_sector;
unsigned int nr_sectors = bio_sectors(bio); unsigned int nr_sectors = bio_sectors(bio);
sector_t chunk_sector; sector_t chunk_sector;
int ret;
if (dmz_bdev_is_dying(dmz->dev))
return DM_MAPIO_KILL;
dmz_dev_debug(dev, "BIO op %d sector %llu + %u => chunk %llu, block %llu, %u blocks", dmz_dev_debug(dev, "BIO op %d sector %llu + %u => chunk %llu, block %llu, %u blocks",
bio_op(bio), (unsigned long long)sector, nr_sectors, bio_op(bio), (unsigned long long)sector, nr_sectors,
...@@ -601,8 +644,14 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) ...@@ -601,8 +644,14 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
dm_accept_partial_bio(bio, dev->zone_nr_sectors - chunk_sector); dm_accept_partial_bio(bio, dev->zone_nr_sectors - chunk_sector);
/* Now ready to handle this BIO */ /* Now ready to handle this BIO */
dmz_reclaim_bio_acc(dmz->reclaim); ret = dmz_queue_chunk_work(dmz, bio);
dmz_queue_chunk_work(dmz, bio); if (ret) {
dmz_dev_debug(dmz->dev,
"BIO op %d, can't process chunk %llu, err %i\n",
bio_op(bio), (u64)dmz_bio_chunk(dmz->dev, bio),
ret);
return DM_MAPIO_REQUEUE;
}
return DM_MAPIO_SUBMITTED; return DM_MAPIO_SUBMITTED;
} }
...@@ -855,6 +904,9 @@ static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) ...@@ -855,6 +904,9 @@ static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
{ {
struct dmz_target *dmz = ti->private; struct dmz_target *dmz = ti->private;
if (dmz_bdev_is_dying(dmz->dev))
return -ENODEV;
*bdev = dmz->dev->bdev; *bdev = dmz->dev->bdev;
return 0; return 0;
......
/* SPDX-License-Identifier: GPL-2.0 */
/* /*
* Copyright (C) 2017 Western Digital Corporation or its affiliates. * Copyright (C) 2017 Western Digital Corporation or its affiliates.
* *
...@@ -56,6 +57,8 @@ struct dmz_dev { ...@@ -56,6 +57,8 @@ struct dmz_dev {
unsigned int nr_zones; unsigned int nr_zones;
unsigned int flags;
sector_t zone_nr_sectors; sector_t zone_nr_sectors;
unsigned int zone_nr_sectors_shift; unsigned int zone_nr_sectors_shift;
...@@ -67,6 +70,9 @@ struct dmz_dev { ...@@ -67,6 +70,9 @@ struct dmz_dev {
(dev)->zone_nr_sectors_shift) (dev)->zone_nr_sectors_shift)
#define dmz_chunk_block(dev, b) ((b) & ((dev)->zone_nr_blocks - 1)) #define dmz_chunk_block(dev, b) ((b) & ((dev)->zone_nr_blocks - 1))
/* Device flags. */
#define DMZ_BDEV_DYING (1 << 0)
/* /*
* Zone descriptor. * Zone descriptor.
*/ */
...@@ -245,4 +251,9 @@ void dmz_resume_reclaim(struct dmz_reclaim *zrc); ...@@ -245,4 +251,9 @@ void dmz_resume_reclaim(struct dmz_reclaim *zrc);
void dmz_reclaim_bio_acc(struct dmz_reclaim *zrc); void dmz_reclaim_bio_acc(struct dmz_reclaim *zrc);
void dmz_schedule_reclaim(struct dmz_reclaim *zrc); void dmz_schedule_reclaim(struct dmz_reclaim *zrc);
/*
* Functions defined in dm-zoned-target.c
*/
bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev);
#endif /* DM_ZONED_H */ #endif /* DM_ZONED_H */
...@@ -628,39 +628,40 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) ...@@ -628,39 +628,40 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
new_parent = shadow_current(s); new_parent = shadow_current(s);
pn = dm_block_data(new_parent);
size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ?
sizeof(__le64) : s->info->value_type.size;
/* create & init the left block */
r = new_block(s->info, &left); r = new_block(s->info, &left);
if (r < 0) if (r < 0)
return r; return r;
ln = dm_block_data(left);
nr_left = le32_to_cpu(pn->header.nr_entries) / 2;
ln->header.flags = pn->header.flags;
ln->header.nr_entries = cpu_to_le32(nr_left);
ln->header.max_entries = pn->header.max_entries;
ln->header.value_size = pn->header.value_size;
memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0]));
memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size);
/* create & init the right block */
r = new_block(s->info, &right); r = new_block(s->info, &right);
if (r < 0) { if (r < 0) {
unlock_block(s->info, left); unlock_block(s->info, left);
return r; return r;
} }
pn = dm_block_data(new_parent);
ln = dm_block_data(left);
rn = dm_block_data(right); rn = dm_block_data(right);
nr_left = le32_to_cpu(pn->header.nr_entries) / 2;
nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left; nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left;
ln->header.flags = pn->header.flags;
ln->header.nr_entries = cpu_to_le32(nr_left);
ln->header.max_entries = pn->header.max_entries;
ln->header.value_size = pn->header.value_size;
rn->header.flags = pn->header.flags; rn->header.flags = pn->header.flags;
rn->header.nr_entries = cpu_to_le32(nr_right); rn->header.nr_entries = cpu_to_le32(nr_right);
rn->header.max_entries = pn->header.max_entries; rn->header.max_entries = pn->header.max_entries;
rn->header.value_size = pn->header.value_size; rn->header.value_size = pn->header.value_size;
memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0]));
memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0])); memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0]));
size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ?
sizeof(__le64) : s->info->value_type.size;
memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size);
memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left), memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left),
nr_right * size); nr_right * size);
......
...@@ -249,7 +249,7 @@ static int out(struct sm_metadata *smm) ...@@ -249,7 +249,7 @@ static int out(struct sm_metadata *smm)
} }
if (smm->recursion_count == 1) if (smm->recursion_count == 1)
apply_bops(smm); r = apply_bops(smm);
smm->recursion_count--; smm->recursion_count--;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment