Commit c45e8bcc authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.7/dm-fixes-2' of...

Merge tag 'for-5.7/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - Document DM integrity allow_discard feature that was added during 5.7
   merge window.

 - Fix potential for DM writecache data corruption during DM table
   reloads.

 - Fix DM verity's FEC support's hash block number calculation in
   verity_fec_decode().

 - Fix bio-based DM multipath crash due to use of stale copy of
   MPATHF_QUEUE_IO flag state in __map_bio().

* tag 'for-5.7/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm multipath: use updated MPATHF_QUEUE_IO on mapping for bio-based mpath
  dm verity fec: fix hash block number in verity_fec_decode
  dm writecache: fix data corruption when reloading the target
  dm integrity: document allow_discard option
parents 39e16d93 5686dee3
......@@ -182,12 +182,15 @@ fix_padding
space-efficient. If this option is not present, large padding is
used - that is for compatibility with older kernels.
The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
be changed when reloading the target (load an inactive table and swap the
tables with suspend and resume). The other arguments should not be changed
when reloading the target because the layout of disk data depend on them
and the reloaded target would be non-functional.
allow_discards
Allow block discard requests (a.k.a. TRIM) for the integrity device.
Discards are only allowed to devices using internal hash.
The journal mode (D/J), buffer_sectors, journal_watermark, commit_time and
allow_discards can be changed when reloading the target (load an inactive
table and swap the tables with suspend and resume). The other arguments
should not be changed when reloading the target because the layout of disk
data depend on them and the reloaded target would be non-functional.
The layout of the formatted block device:
......
......@@ -585,10 +585,12 @@ static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
/* Do we need to select a new pgpath? */
pgpath = READ_ONCE(m->current_pgpath);
queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
if (!pgpath || !queue_io)
if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags))
pgpath = choose_pgpath(m, bio->bi_iter.bi_size);
/* MPATHF_QUEUE_IO might have been cleared by choose_pgpath. */
queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
if ((pgpath && queue_io) ||
(!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) {
/* Queue for the daemon to resubmit */
......
......@@ -435,7 +435,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
fio->level++;
if (type == DM_VERITY_BLOCK_TYPE_METADATA)
block += v->data_blocks;
block = block - v->hash_start + v->data_blocks;
/*
* For RS(M, N), the continuous FEC data is divided into blocks of N
......
......@@ -931,6 +931,24 @@ static int writecache_alloc_entries(struct dm_writecache *wc)
return 0;
}
static int writecache_read_metadata(struct dm_writecache *wc, sector_t n_sectors)
{
struct dm_io_region region;
struct dm_io_request req;
region.bdev = wc->ssd_dev->bdev;
region.sector = wc->start_sector;
region.count = n_sectors;
req.bi_op = REQ_OP_READ;
req.bi_op_flags = REQ_SYNC;
req.mem.type = DM_IO_VMA;
req.mem.ptr.vma = (char *)wc->memory_map;
req.client = wc->dm_io;
req.notify.fn = NULL;
return dm_io(&req, 1, &region, NULL);
}
static void writecache_resume(struct dm_target *ti)
{
struct dm_writecache *wc = ti->private;
......@@ -941,8 +959,18 @@ static void writecache_resume(struct dm_target *ti)
wc_lock(wc);
if (WC_MODE_PMEM(wc))
if (WC_MODE_PMEM(wc)) {
persistent_memory_invalidate_cache(wc->memory_map, wc->memory_map_size);
} else {
r = writecache_read_metadata(wc, wc->metadata_sectors);
if (r) {
size_t sb_entries_offset;
writecache_error(wc, r, "unable to read metadata: %d", r);
sb_entries_offset = offsetof(struct wc_memory_superblock, entries);
memset((char *)wc->memory_map + sb_entries_offset, -1,
(wc->metadata_sectors << SECTOR_SHIFT) - sb_entries_offset);
}
}
wc->tree = RB_ROOT;
INIT_LIST_HEAD(&wc->lru);
......@@ -2102,6 +2130,12 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->error = "Invalid block size";
goto bad;
}
if (wc->block_size < bdev_logical_block_size(wc->dev->bdev) ||
wc->block_size < bdev_logical_block_size(wc->ssd_dev->bdev)) {
r = -EINVAL;
ti->error = "Block size is smaller than device logical block size";
goto bad;
}
wc->block_size_bits = __ffs(wc->block_size);
wc->max_writeback_jobs = MAX_WRITEBACK_JOBS;
......@@ -2200,8 +2234,6 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
} else {
struct dm_io_region region;
struct dm_io_request req;
size_t n_blocks, n_metadata_blocks;
uint64_t n_bitmap_bits;
......@@ -2258,19 +2290,9 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
region.bdev = wc->ssd_dev->bdev;
region.sector = wc->start_sector;
region.count = wc->metadata_sectors;
req.bi_op = REQ_OP_READ;
req.bi_op_flags = REQ_SYNC;
req.mem.type = DM_IO_VMA;
req.mem.ptr.vma = (char *)wc->memory_map;
req.client = wc->dm_io;
req.notify.fn = NULL;
r = dm_io(&req, 1, &region, NULL);
r = writecache_read_metadata(wc, wc->block_size >> SECTOR_SHIFT);
if (r) {
ti->error = "Unable to read metadata";
ti->error = "Unable to read first block of metadata";
goto bad;
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment