Commit ce673f63 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.18/dm-fixes-2' of...

Merge tag 'for-5.18/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - Fix memory corruption in DM integrity target when tag_size is less
   than digest size.

 - Fix DM multipath's historical-service-time path selector to not use
   sched_clock() and ktime_get_ns(); only use ktime_get_ns().

 - Fix dm_io->orig_bio NULL pointer dereference in dm_zone_map_bio() due
   to 5.18 changes that overlooked DM zone's use of ->orig_bio

 - Fix for regression that broke the use of dm_accept_partial_bio() for
   "abnormal" IO (e.g. WRITE ZEROES) that does not need duplicate bios

 - Fix DM's issuing of empty flush bio so that it's size is 0.

* tag 'for-5.18/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm: fix bio length of empty flush
  dm: allow dm_accept_partial_bio() for dm_io without duplicate bios
  dm zone: fix NULL pointer dereference in dm_zone_map_bio
  dm mpath: only use ktime_get_ns() in historical selector
  dm integrity: fix memory corruption when tag_size is less than digest size
parents fb649bda 92b914e2
...@@ -4399,6 +4399,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -4399,6 +4399,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
} }
if (ic->internal_hash) { if (ic->internal_hash) {
size_t recalc_tags_size;
ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1); ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1);
if (!ic->recalc_wq ) { if (!ic->recalc_wq ) {
ti->error = "Cannot allocate workqueue"; ti->error = "Cannot allocate workqueue";
...@@ -4412,8 +4413,10 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -4412,8 +4413,10 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
r = -ENOMEM; r = -ENOMEM;
goto bad; goto bad;
} }
ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block, recalc_tags_size = (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size;
ic->tag_size, GFP_KERNEL); if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size)
recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size;
ic->recalc_tags = kvmalloc(recalc_tags_size, GFP_KERNEL);
if (!ic->recalc_tags) { if (!ic->recalc_tags) {
ti->error = "Cannot allocate tags for recalculating"; ti->error = "Cannot allocate tags for recalculating";
r = -ENOMEM; r = -ENOMEM;
......
...@@ -27,7 +27,6 @@ ...@@ -27,7 +27,6 @@
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/sched/clock.h>
#define DM_MSG_PREFIX "multipath historical-service-time" #define DM_MSG_PREFIX "multipath historical-service-time"
...@@ -433,7 +432,7 @@ static struct dm_path *hst_select_path(struct path_selector *ps, ...@@ -433,7 +432,7 @@ static struct dm_path *hst_select_path(struct path_selector *ps,
{ {
struct selector *s = ps->context; struct selector *s = ps->context;
struct path_info *pi = NULL, *best = NULL; struct path_info *pi = NULL, *best = NULL;
u64 time_now = sched_clock(); u64 time_now = ktime_get_ns();
struct dm_path *ret = NULL; struct dm_path *ret = NULL;
unsigned long flags; unsigned long flags;
...@@ -474,7 +473,7 @@ static int hst_start_io(struct path_selector *ps, struct dm_path *path, ...@@ -474,7 +473,7 @@ static int hst_start_io(struct path_selector *ps, struct dm_path *path,
static u64 path_service_time(struct path_info *pi, u64 start_time) static u64 path_service_time(struct path_info *pi, u64 start_time)
{ {
u64 sched_now = ktime_get_ns(); u64 now = ktime_get_ns();
/* if a previous disk request has finished after this IO was /* if a previous disk request has finished after this IO was
* sent to the hardware, pretend the submission happened * sent to the hardware, pretend the submission happened
...@@ -483,11 +482,11 @@ static u64 path_service_time(struct path_info *pi, u64 start_time) ...@@ -483,11 +482,11 @@ static u64 path_service_time(struct path_info *pi, u64 start_time)
if (time_after64(pi->last_finish, start_time)) if (time_after64(pi->last_finish, start_time))
start_time = pi->last_finish; start_time = pi->last_finish;
pi->last_finish = sched_now; pi->last_finish = now;
if (time_before64(sched_now, start_time)) if (time_before64(now, start_time))
return 0; return 0;
return sched_now - start_time; return now - start_time;
} }
static int hst_end_io(struct path_selector *ps, struct dm_path *path, static int hst_end_io(struct path_selector *ps, struct dm_path *path,
......
...@@ -360,16 +360,20 @@ static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno, ...@@ -360,16 +360,20 @@ static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno,
return 0; return 0;
} }
struct orig_bio_details {
unsigned int op;
unsigned int nr_sectors;
};
/* /*
* First phase of BIO mapping for targets with zone append emulation: * First phase of BIO mapping for targets with zone append emulation:
* check all BIO that change a zone writer pointer and change zone * check all BIO that change a zone writer pointer and change zone
* append operations into regular write operations. * append operations into regular write operations.
*/ */
static bool dm_zone_map_bio_begin(struct mapped_device *md, static bool dm_zone_map_bio_begin(struct mapped_device *md,
struct bio *orig_bio, struct bio *clone) unsigned int zno, struct bio *clone)
{ {
sector_t zsectors = blk_queue_zone_sectors(md->queue); sector_t zsectors = blk_queue_zone_sectors(md->queue);
unsigned int zno = bio_zone_no(orig_bio);
unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]);
/* /*
...@@ -384,7 +388,7 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md, ...@@ -384,7 +388,7 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md,
WRITE_ONCE(md->zwp_offset[zno], zwp_offset); WRITE_ONCE(md->zwp_offset[zno], zwp_offset);
} }
switch (bio_op(orig_bio)) { switch (bio_op(clone)) {
case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_RESET:
case REQ_OP_ZONE_FINISH: case REQ_OP_ZONE_FINISH:
return true; return true;
...@@ -401,9 +405,8 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md, ...@@ -401,9 +405,8 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md,
* target zone. * target zone.
*/ */
clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE | clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE |
(orig_bio->bi_opf & (~REQ_OP_MASK)); (clone->bi_opf & (~REQ_OP_MASK));
clone->bi_iter.bi_sector = clone->bi_iter.bi_sector += zwp_offset;
orig_bio->bi_iter.bi_sector + zwp_offset;
break; break;
default: default:
DMWARN_LIMIT("Invalid BIO operation"); DMWARN_LIMIT("Invalid BIO operation");
...@@ -423,11 +426,10 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md, ...@@ -423,11 +426,10 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md,
* data written to a zone. Note that at this point, the remapped clone BIO * data written to a zone. Note that at this point, the remapped clone BIO
* may already have completed, so we do not touch it. * may already have completed, so we do not touch it.
*/ */
static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int zno,
struct bio *orig_bio, struct orig_bio_details *orig_bio_details,
unsigned int nr_sectors) unsigned int nr_sectors)
{ {
unsigned int zno = bio_zone_no(orig_bio);
unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]);
/* The clone BIO may already have been completed and failed */ /* The clone BIO may already have been completed and failed */
...@@ -435,7 +437,7 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, ...@@ -435,7 +437,7 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md,
return BLK_STS_IOERR; return BLK_STS_IOERR;
/* Update the zone wp offset */ /* Update the zone wp offset */
switch (bio_op(orig_bio)) { switch (orig_bio_details->op) {
case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_RESET:
WRITE_ONCE(md->zwp_offset[zno], 0); WRITE_ONCE(md->zwp_offset[zno], 0);
return BLK_STS_OK; return BLK_STS_OK;
...@@ -452,7 +454,7 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, ...@@ -452,7 +454,7 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md,
* Check that the target did not truncate the write operation * Check that the target did not truncate the write operation
* emulating a zone append. * emulating a zone append.
*/ */
if (nr_sectors != bio_sectors(orig_bio)) { if (nr_sectors != orig_bio_details->nr_sectors) {
DMWARN_LIMIT("Truncated write for zone append"); DMWARN_LIMIT("Truncated write for zone append");
return BLK_STS_IOERR; return BLK_STS_IOERR;
} }
...@@ -488,7 +490,7 @@ static inline void dm_zone_unlock(struct request_queue *q, ...@@ -488,7 +490,7 @@ static inline void dm_zone_unlock(struct request_queue *q,
bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED); bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED);
} }
static bool dm_need_zone_wp_tracking(struct bio *orig_bio) static bool dm_need_zone_wp_tracking(struct bio *bio)
{ {
/* /*
* Special processing is not needed for operations that do not need the * Special processing is not needed for operations that do not need the
...@@ -496,15 +498,15 @@ static bool dm_need_zone_wp_tracking(struct bio *orig_bio) ...@@ -496,15 +498,15 @@ static bool dm_need_zone_wp_tracking(struct bio *orig_bio)
* zones and all operations that do not modify directly a sequential * zones and all operations that do not modify directly a sequential
* zone write pointer. * zone write pointer.
*/ */
if (op_is_flush(orig_bio->bi_opf) && !bio_sectors(orig_bio)) if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
return false; return false;
switch (bio_op(orig_bio)) { switch (bio_op(bio)) {
case REQ_OP_WRITE_ZEROES: case REQ_OP_WRITE_ZEROES:
case REQ_OP_WRITE: case REQ_OP_WRITE:
case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_RESET:
case REQ_OP_ZONE_FINISH: case REQ_OP_ZONE_FINISH:
case REQ_OP_ZONE_APPEND: case REQ_OP_ZONE_APPEND:
return bio_zone_is_seq(orig_bio); return bio_zone_is_seq(bio);
default: default:
return false; return false;
} }
...@@ -519,8 +521,8 @@ int dm_zone_map_bio(struct dm_target_io *tio) ...@@ -519,8 +521,8 @@ int dm_zone_map_bio(struct dm_target_io *tio)
struct dm_target *ti = tio->ti; struct dm_target *ti = tio->ti;
struct mapped_device *md = io->md; struct mapped_device *md = io->md;
struct request_queue *q = md->queue; struct request_queue *q = md->queue;
struct bio *orig_bio = io->orig_bio;
struct bio *clone = &tio->clone; struct bio *clone = &tio->clone;
struct orig_bio_details orig_bio_details;
unsigned int zno; unsigned int zno;
blk_status_t sts; blk_status_t sts;
int r; int r;
...@@ -529,18 +531,21 @@ int dm_zone_map_bio(struct dm_target_io *tio) ...@@ -529,18 +531,21 @@ int dm_zone_map_bio(struct dm_target_io *tio)
* IOs that do not change a zone write pointer do not need * IOs that do not change a zone write pointer do not need
* any additional special processing. * any additional special processing.
*/ */
if (!dm_need_zone_wp_tracking(orig_bio)) if (!dm_need_zone_wp_tracking(clone))
return ti->type->map(ti, clone); return ti->type->map(ti, clone);
/* Lock the target zone */ /* Lock the target zone */
zno = bio_zone_no(orig_bio); zno = bio_zone_no(clone);
dm_zone_lock(q, zno, clone); dm_zone_lock(q, zno, clone);
orig_bio_details.nr_sectors = bio_sectors(clone);
orig_bio_details.op = bio_op(clone);
/* /*
* Check that the bio and the target zone write pointer offset are * Check that the bio and the target zone write pointer offset are
* both valid, and if the bio is a zone append, remap it to a write. * both valid, and if the bio is a zone append, remap it to a write.
*/ */
if (!dm_zone_map_bio_begin(md, orig_bio, clone)) { if (!dm_zone_map_bio_begin(md, zno, clone)) {
dm_zone_unlock(q, zno, clone); dm_zone_unlock(q, zno, clone);
return DM_MAPIO_KILL; return DM_MAPIO_KILL;
} }
...@@ -560,7 +565,8 @@ int dm_zone_map_bio(struct dm_target_io *tio) ...@@ -560,7 +565,8 @@ int dm_zone_map_bio(struct dm_target_io *tio)
* The target submitted the clone BIO. The target zone will * The target submitted the clone BIO. The target zone will
* be unlocked on completion of the clone. * be unlocked on completion of the clone.
*/ */
sts = dm_zone_map_bio_end(md, orig_bio, *tio->len_ptr); sts = dm_zone_map_bio_end(md, zno, &orig_bio_details,
*tio->len_ptr);
break; break;
case DM_MAPIO_REMAPPED: case DM_MAPIO_REMAPPED:
/* /*
...@@ -568,7 +574,8 @@ int dm_zone_map_bio(struct dm_target_io *tio) ...@@ -568,7 +574,8 @@ int dm_zone_map_bio(struct dm_target_io *tio)
* unlock the target zone here as the clone will not be * unlock the target zone here as the clone will not be
* submitted. * submitted.
*/ */
sts = dm_zone_map_bio_end(md, orig_bio, *tio->len_ptr); sts = dm_zone_map_bio_end(md, zno, &orig_bio_details,
*tio->len_ptr);
if (sts != BLK_STS_OK) if (sts != BLK_STS_OK)
dm_zone_unlock(q, zno, clone); dm_zone_unlock(q, zno, clone);
break; break;
......
...@@ -1323,8 +1323,7 @@ static void __map_bio(struct bio *clone) ...@@ -1323,8 +1323,7 @@ static void __map_bio(struct bio *clone)
} }
static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
struct dm_target *ti, unsigned num_bios, struct dm_target *ti, unsigned num_bios)
unsigned *len)
{ {
struct bio *bio; struct bio *bio;
int try; int try;
...@@ -1335,7 +1334,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, ...@@ -1335,7 +1334,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
if (try) if (try)
mutex_lock(&ci->io->md->table_devices_lock); mutex_lock(&ci->io->md->table_devices_lock);
for (bio_nr = 0; bio_nr < num_bios; bio_nr++) { for (bio_nr = 0; bio_nr < num_bios; bio_nr++) {
bio = alloc_tio(ci, ti, bio_nr, len, bio = alloc_tio(ci, ti, bio_nr, NULL,
try ? GFP_NOIO : GFP_NOWAIT); try ? GFP_NOIO : GFP_NOWAIT);
if (!bio) if (!bio)
break; break;
...@@ -1363,11 +1362,11 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, ...@@ -1363,11 +1362,11 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
break; break;
case 1: case 1:
clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO);
__map_bio(clone); __map_bio(clone);
break; break;
default: default:
alloc_multiple_bios(&blist, ci, ti, num_bios, len); /* dm_accept_partial_bio() is not supported with shared tio->len_ptr */
alloc_multiple_bios(&blist, ci, ti, num_bios);
while ((clone = bio_list_pop(&blist))) { while ((clone = bio_list_pop(&blist))) {
dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO); dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO);
__map_bio(clone); __map_bio(clone);
...@@ -1392,6 +1391,7 @@ static void __send_empty_flush(struct clone_info *ci) ...@@ -1392,6 +1391,7 @@ static void __send_empty_flush(struct clone_info *ci)
ci->bio = &flush_bio; ci->bio = &flush_bio;
ci->sector_count = 0; ci->sector_count = 0;
ci->io->tio.clone.bi_iter.bi_size = 0;
while ((ti = dm_table_get_target(ci->map, target_nr++))) while ((ti = dm_table_get_target(ci->map, target_nr++)))
__send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
...@@ -1407,14 +1407,10 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target ...@@ -1407,14 +1407,10 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target
len = min_t(sector_t, ci->sector_count, len = min_t(sector_t, ci->sector_count,
max_io_len_target_boundary(ti, dm_target_offset(ti, ci->sector))); max_io_len_target_boundary(ti, dm_target_offset(ti, ci->sector)));
/* __send_duplicate_bios(ci, ti, num_bios, &len);
* dm_accept_partial_bio cannot be used with duplicate bios,
* so update clone_info cursor before __send_duplicate_bios().
*/
ci->sector += len; ci->sector += len;
ci->sector_count -= len; ci->sector_count -= len;
__send_duplicate_bios(ci, ti, num_bios, &len);
} }
static bool is_abnormal_io(struct bio *bio) static bool is_abnormal_io(struct bio *bio)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment