Commit 4ed7bdc1 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-4.21/dm-changes' of...

Merge tag 'for-4.21/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - Eliminate a couple indirect calls from bio-based DM core.

 - Fix DM to allow reads that exceed readahead limits by setting
   io_pages in the backing_dev_info.

 - A couple code cleanups in request-based DM.

 - Fix various DM targets to check for device sector overflow if
   CONFIG_LBDAF is not set.

 - Use u64 instead of sector_t to store iv_offset in DM crypt; sector_t
   isn't large enough on 32bit when CONFIG_LBDAF is not set.

 - Performance fixes to DM's kcopyd and the snapshot target focused on
   limiting memory use and workqueue stalls.

 - Fix typos in the integrity and writecache targets.

 - Log which algorithm is used for dm-crypt's encryption and
   dm-integrity's hashing.

 - Fix false -EBUSY errors in DM raid target's handling of check/repair
   messages.

 - Fix DM flakey target's corrupt_bio_byte feature to reliably corrupt
   the Nth byte in a bio's payload.

* tag 'for-4.21/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm: do not allow readahead to limit IO size
  dm raid: fix false -EBUSY when handling check/repair message
  dm rq: cleanup leftover code from recently removed q->mq_ops branching
  dm verity: log the hash algorithm implementation
  dm crypt: log the encryption algorithm implementation
  dm integrity: fix spelling mistake in workqueue name
  dm flakey: Properly corrupt multi-page bios.
  dm: Check for device sector overflow if CONFIG_LBDAF is not set
  dm crypt: use u64 instead of sector_t to store iv_offset
  dm kcopyd: Fix bug causing workqueue stalls
  dm snapshot: Fix excessive memory usage and workqueue stalls
  dm bufio: update comment in dm-bufio.c
  dm writecache: fix typo in error msg for creating writecache_flush_thread
  dm: remove indirect calls from __send_changing_extent_only()
  dm mpath: only flush workqueue when needed
  dm rq: remove unused arguments from rq_completed()
  dm: avoid indirect call in __dm_make_request
parents 5d24ae67 c6d6e9b0
...@@ -65,7 +65,7 @@ ...@@ -65,7 +65,7 @@
/* /*
* Linking of buffers: * Linking of buffers:
* All buffers are linked to cache_hash with their hash_list field. * All buffers are linked to buffer_tree with their node field.
* *
* Clean buffers that are not being written (B_WRITING not set) * Clean buffers that are not being written (B_WRITING not set)
* are linked to lru[LIST_CLEAN] with their lru_list field. * are linked to lru[LIST_CLEAN] with their lru_list field.
...@@ -457,7 +457,7 @@ static void free_buffer(struct dm_buffer *b) ...@@ -457,7 +457,7 @@ static void free_buffer(struct dm_buffer *b)
} }
/* /*
* Link buffer to the hash list and clean or dirty queue. * Link buffer to the buffer tree and clean or dirty queue.
*/ */
static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty) static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
{ {
...@@ -472,7 +472,7 @@ static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty) ...@@ -472,7 +472,7 @@ static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
} }
/* /*
* Unlink buffer from the hash list and dirty or clean queue. * Unlink buffer from the buffer tree and dirty or clean queue.
*/ */
static void __unlink_buffer(struct dm_buffer *b) static void __unlink_buffer(struct dm_buffer *b)
{ {
...@@ -993,7 +993,7 @@ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block, ...@@ -993,7 +993,7 @@ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
/* /*
* We've had a period where the mutex was unlocked, so need to * We've had a period where the mutex was unlocked, so need to
* recheck the hash table. * recheck the buffer tree.
*/ */
b = __find(c, block); b = __find(c, block);
if (b) { if (b) {
...@@ -1327,7 +1327,7 @@ int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c) ...@@ -1327,7 +1327,7 @@ int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers); EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
/* /*
* Use dm-io to send and empty barrier flush the device. * Use dm-io to send an empty barrier to flush the device.
*/ */
int dm_bufio_issue_flush(struct dm_bufio_client *c) int dm_bufio_issue_flush(struct dm_bufio_client *c)
{ {
...@@ -1356,7 +1356,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_issue_flush); ...@@ -1356,7 +1356,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_issue_flush);
* Then, we write the buffer to the original location if it was dirty. * Then, we write the buffer to the original location if it was dirty.
* *
* Then, if we are the only one who is holding the buffer, relink the buffer * Then, if we are the only one who is holding the buffer, relink the buffer
* in the hash queue for the new location. * in the buffer tree for the new location.
* *
* If there was someone else holding the buffer, we write it to the new * If there was someone else holding the buffer, we write it to the new
* location but not relink it, because that other user needs to have the buffer * location but not relink it, because that other user needs to have the buffer
......
...@@ -49,7 +49,7 @@ struct convert_context { ...@@ -49,7 +49,7 @@ struct convert_context {
struct bio *bio_out; struct bio *bio_out;
struct bvec_iter iter_in; struct bvec_iter iter_in;
struct bvec_iter iter_out; struct bvec_iter iter_out;
sector_t cc_sector; u64 cc_sector;
atomic_t cc_pending; atomic_t cc_pending;
union { union {
struct skcipher_request *req; struct skcipher_request *req;
...@@ -81,7 +81,7 @@ struct dm_crypt_request { ...@@ -81,7 +81,7 @@ struct dm_crypt_request {
struct convert_context *ctx; struct convert_context *ctx;
struct scatterlist sg_in[4]; struct scatterlist sg_in[4];
struct scatterlist sg_out[4]; struct scatterlist sg_out[4];
sector_t iv_sector; u64 iv_sector;
}; };
struct crypt_config; struct crypt_config;
...@@ -160,7 +160,7 @@ struct crypt_config { ...@@ -160,7 +160,7 @@ struct crypt_config {
struct iv_lmk_private lmk; struct iv_lmk_private lmk;
struct iv_tcw_private tcw; struct iv_tcw_private tcw;
} iv_gen_private; } iv_gen_private;
sector_t iv_offset; u64 iv_offset;
unsigned int iv_size; unsigned int iv_size;
unsigned short int sector_size; unsigned short int sector_size;
unsigned char sector_shift; unsigned char sector_shift;
...@@ -1885,6 +1885,13 @@ static int crypt_alloc_tfms_skcipher(struct crypt_config *cc, char *ciphermode) ...@@ -1885,6 +1885,13 @@ static int crypt_alloc_tfms_skcipher(struct crypt_config *cc, char *ciphermode)
} }
} }
/*
* dm-crypt performance can vary greatly depending on which crypto
* algorithm implementation is used. Help people debug performance
* problems by logging the ->cra_driver_name.
*/
DMINFO("%s using implementation \"%s\"", ciphermode,
crypto_skcipher_alg(any_tfm(cc))->base.cra_driver_name);
return 0; return 0;
} }
...@@ -1903,6 +1910,8 @@ static int crypt_alloc_tfms_aead(struct crypt_config *cc, char *ciphermode) ...@@ -1903,6 +1910,8 @@ static int crypt_alloc_tfms_aead(struct crypt_config *cc, char *ciphermode)
return err; return err;
} }
DMINFO("%s using implementation \"%s\"", ciphermode,
crypto_aead_alg(any_tfm_aead(cc))->base.cra_driver_name);
return 0; return 0;
} }
...@@ -2781,7 +2790,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -2781,7 +2790,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
} }
ret = -EINVAL; ret = -EINVAL;
if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) { if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) {
ti->error = "Invalid device sector"; ti->error = "Invalid device sector";
goto bad; goto bad;
} }
......
...@@ -141,7 +141,7 @@ static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **a ...@@ -141,7 +141,7 @@ static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **a
unsigned long long tmpll; unsigned long long tmpll;
char dummy; char dummy;
if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1) { if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) {
ti->error = "Invalid device sector"; ti->error = "Invalid device sector";
return -EINVAL; return -EINVAL;
} }
......
...@@ -213,7 +213,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -213,7 +213,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
devname = dm_shift_arg(&as); devname = dm_shift_arg(&as);
r = -EINVAL; r = -EINVAL;
if (sscanf(dm_shift_arg(&as), "%llu%c", &tmpll, &dummy) != 1) { if (sscanf(dm_shift_arg(&as), "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) {
ti->error = "Invalid device sector"; ti->error = "Invalid device sector";
goto bad; goto bad;
} }
...@@ -287,20 +287,31 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio) ...@@ -287,20 +287,31 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
{ {
unsigned bio_bytes = bio_cur_bytes(bio); unsigned int corrupt_bio_byte = fc->corrupt_bio_byte - 1;
char *data = bio_data(bio);
struct bvec_iter iter;
struct bio_vec bvec;
if (!bio_has_data(bio))
return;
/* /*
* Overwrite the Nth byte of the data returned. * Overwrite the Nth byte of the bio's data, on whichever page
* it falls.
*/ */
if (data && bio_bytes >= fc->corrupt_bio_byte) { bio_for_each_segment(bvec, bio, iter) {
data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value; if (bio_iter_len(bio, iter) > corrupt_bio_byte) {
char *segment = (page_address(bio_iter_page(bio, iter))
DMDEBUG("Corrupting data bio=%p by writing %u to byte %u " + bio_iter_offset(bio, iter));
"(rw=%c bi_opf=%u bi_sector=%llu cur_bytes=%u)\n", segment[corrupt_bio_byte] = fc->corrupt_bio_value;
bio, fc->corrupt_bio_value, fc->corrupt_bio_byte, DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
(bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf, "(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n",
(unsigned long long)bio->bi_iter.bi_sector, bio_bytes); bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
(bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf,
(unsigned long long)bio->bi_iter.bi_sector, bio->bi_iter.bi_size);
break;
}
corrupt_bio_byte -= bio_iter_len(bio, iter);
} }
} }
......
...@@ -3460,7 +3460,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -3460,7 +3460,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->error = "Recalculate is only valid with internal hash"; ti->error = "Recalculate is only valid with internal hash";
goto bad; goto bad;
} }
ic->recalc_wq = alloc_workqueue("dm-intergrity-recalc", WQ_MEM_RECLAIM, 1); ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1);
if (!ic->recalc_wq ) { if (!ic->recalc_wq ) {
ti->error = "Cannot allocate workqueue"; ti->error = "Cannot allocate workqueue";
r = -ENOMEM; r = -ENOMEM;
......
...@@ -56,15 +56,17 @@ struct dm_kcopyd_client { ...@@ -56,15 +56,17 @@ struct dm_kcopyd_client {
atomic_t nr_jobs; atomic_t nr_jobs;
/* /*
* We maintain three lists of jobs: * We maintain four lists of jobs:
* *
* i) jobs waiting for pages * i) jobs waiting for pages
* ii) jobs that have pages, and are waiting for the io to be issued. * ii) jobs that have pages, and are waiting for the io to be issued.
* iii) jobs that have completed. * iii) jobs that don't need to do any IO and just run a callback
* iv) jobs that have completed.
* *
* All three of these are protected by job_lock. * All four of these are protected by job_lock.
*/ */
spinlock_t job_lock; spinlock_t job_lock;
struct list_head callback_jobs;
struct list_head complete_jobs; struct list_head complete_jobs;
struct list_head io_jobs; struct list_head io_jobs;
struct list_head pages_jobs; struct list_head pages_jobs;
...@@ -625,6 +627,7 @@ static void do_work(struct work_struct *work) ...@@ -625,6 +627,7 @@ static void do_work(struct work_struct *work)
struct dm_kcopyd_client *kc = container_of(work, struct dm_kcopyd_client *kc = container_of(work,
struct dm_kcopyd_client, kcopyd_work); struct dm_kcopyd_client, kcopyd_work);
struct blk_plug plug; struct blk_plug plug;
unsigned long flags;
/* /*
* The order that these are called is *very* important. * The order that these are called is *very* important.
...@@ -633,6 +636,10 @@ static void do_work(struct work_struct *work) ...@@ -633,6 +636,10 @@ static void do_work(struct work_struct *work)
* list. io jobs call wake when they complete and it all * list. io jobs call wake when they complete and it all
* starts again. * starts again.
*/ */
spin_lock_irqsave(&kc->job_lock, flags);
list_splice_tail_init(&kc->callback_jobs, &kc->complete_jobs);
spin_unlock_irqrestore(&kc->job_lock, flags);
blk_start_plug(&plug); blk_start_plug(&plug);
process_jobs(&kc->complete_jobs, kc, run_complete_job); process_jobs(&kc->complete_jobs, kc, run_complete_job);
process_jobs(&kc->pages_jobs, kc, run_pages_job); process_jobs(&kc->pages_jobs, kc, run_pages_job);
...@@ -650,7 +657,7 @@ static void dispatch_job(struct kcopyd_job *job) ...@@ -650,7 +657,7 @@ static void dispatch_job(struct kcopyd_job *job)
struct dm_kcopyd_client *kc = job->kc; struct dm_kcopyd_client *kc = job->kc;
atomic_inc(&kc->nr_jobs); atomic_inc(&kc->nr_jobs);
if (unlikely(!job->source.count)) if (unlikely(!job->source.count))
push(&kc->complete_jobs, job); push(&kc->callback_jobs, job);
else if (job->pages == &zero_page_list) else if (job->pages == &zero_page_list)
push(&kc->io_jobs, job); push(&kc->io_jobs, job);
else else
...@@ -858,7 +865,7 @@ void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err) ...@@ -858,7 +865,7 @@ void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err)
job->read_err = read_err; job->read_err = read_err;
job->write_err = write_err; job->write_err = write_err;
push(&kc->complete_jobs, job); push(&kc->callback_jobs, job);
wake(kc); wake(kc);
} }
EXPORT_SYMBOL(dm_kcopyd_do_callback); EXPORT_SYMBOL(dm_kcopyd_do_callback);
...@@ -888,6 +895,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *thro ...@@ -888,6 +895,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *thro
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
spin_lock_init(&kc->job_lock); spin_lock_init(&kc->job_lock);
INIT_LIST_HEAD(&kc->callback_jobs);
INIT_LIST_HEAD(&kc->complete_jobs); INIT_LIST_HEAD(&kc->complete_jobs);
INIT_LIST_HEAD(&kc->io_jobs); INIT_LIST_HEAD(&kc->io_jobs);
INIT_LIST_HEAD(&kc->pages_jobs); INIT_LIST_HEAD(&kc->pages_jobs);
...@@ -939,6 +947,7 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc) ...@@ -939,6 +947,7 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc)
/* Wait for completion of all jobs submitted by this client. */ /* Wait for completion of all jobs submitted by this client. */
wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs)); wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs));
BUG_ON(!list_empty(&kc->callback_jobs));
BUG_ON(!list_empty(&kc->complete_jobs)); BUG_ON(!list_empty(&kc->complete_jobs));
BUG_ON(!list_empty(&kc->io_jobs)); BUG_ON(!list_empty(&kc->io_jobs));
BUG_ON(!list_empty(&kc->pages_jobs)); BUG_ON(!list_empty(&kc->pages_jobs));
......
...@@ -45,7 +45,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -45,7 +45,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
} }
ret = -EINVAL; ret = -EINVAL;
if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1) { if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1 || tmp != (sector_t)tmp) {
ti->error = "Invalid device sector"; ti->error = "Invalid device sector";
goto bad; goto bad;
} }
......
...@@ -1211,14 +1211,16 @@ static void flush_multipath_work(struct multipath *m) ...@@ -1211,14 +1211,16 @@ static void flush_multipath_work(struct multipath *m)
set_bit(MPATHF_PG_INIT_DISABLED, &m->flags); set_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
smp_mb__after_atomic(); smp_mb__after_atomic();
flush_workqueue(kmpath_handlerd); if (atomic_read(&m->pg_init_in_progress))
flush_workqueue(kmpath_handlerd);
multipath_wait_for_pg_init_completion(m); multipath_wait_for_pg_init_completion(m);
clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags); clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
smp_mb__after_atomic(); smp_mb__after_atomic();
} }
flush_workqueue(kmultipathd); if (m->queue_mode == DM_TYPE_BIO_BASED)
flush_work(&m->process_queued_bios);
flush_work(&m->trigger_event); flush_work(&m->trigger_event);
} }
......
...@@ -3690,8 +3690,7 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv, ...@@ -3690,8 +3690,7 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev); md_reap_sync_thread(mddev);
} }
} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || } else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
return -EBUSY; return -EBUSY;
else if (!strcasecmp(argv[0], "resync")) else if (!strcasecmp(argv[0], "resync"))
; /* MD_RECOVERY_NEEDED set below */ ; /* MD_RECOVERY_NEEDED set below */
......
...@@ -943,7 +943,8 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti, ...@@ -943,7 +943,8 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
char dummy; char dummy;
int ret; int ret;
if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1) { if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1 ||
offset != (sector_t)offset) {
ti->error = "Invalid offset"; ti->error = "Invalid offset";
return -EINVAL; return -EINVAL;
} }
......
...@@ -128,7 +128,7 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig) ...@@ -128,7 +128,7 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig)
* the md may be freed in dm_put() at the end of this function. * the md may be freed in dm_put() at the end of this function.
* Or do dm_get() before calling this function and dm_put() later. * Or do dm_get() before calling this function and dm_put() later.
*/ */
static void rq_completed(struct mapped_device *md, int rw, bool run_queue) static void rq_completed(struct mapped_device *md)
{ {
/* nudge anyone waiting on suspend queue */ /* nudge anyone waiting on suspend queue */
if (unlikely(waitqueue_active(&md->wait))) if (unlikely(waitqueue_active(&md->wait)))
...@@ -147,7 +147,6 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) ...@@ -147,7 +147,6 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
*/ */
static void dm_end_request(struct request *clone, blk_status_t error) static void dm_end_request(struct request *clone, blk_status_t error)
{ {
int rw = rq_data_dir(clone);
struct dm_rq_target_io *tio = clone->end_io_data; struct dm_rq_target_io *tio = clone->end_io_data;
struct mapped_device *md = tio->md; struct mapped_device *md = tio->md;
struct request *rq = tio->orig; struct request *rq = tio->orig;
...@@ -157,7 +156,7 @@ static void dm_end_request(struct request *clone, blk_status_t error) ...@@ -157,7 +156,7 @@ static void dm_end_request(struct request *clone, blk_status_t error)
rq_end_stats(md, rq); rq_end_stats(md, rq);
blk_mq_end_request(rq, error); blk_mq_end_request(rq, error);
rq_completed(md, rw, true); rq_completed(md);
} }
static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs) static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs)
...@@ -181,7 +180,6 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_ ...@@ -181,7 +180,6 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
{ {
struct mapped_device *md = tio->md; struct mapped_device *md = tio->md;
struct request *rq = tio->orig; struct request *rq = tio->orig;
int rw = rq_data_dir(rq);
unsigned long delay_ms = delay_requeue ? 100 : 0; unsigned long delay_ms = delay_requeue ? 100 : 0;
rq_end_stats(md, rq); rq_end_stats(md, rq);
...@@ -191,7 +189,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_ ...@@ -191,7 +189,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
} }
dm_mq_delay_requeue_request(rq, delay_ms); dm_mq_delay_requeue_request(rq, delay_ms);
rq_completed(md, rw, false); rq_completed(md);
} }
static void dm_done(struct request *clone, blk_status_t error, bool mapped) static void dm_done(struct request *clone, blk_status_t error, bool mapped)
...@@ -246,15 +244,13 @@ static void dm_softirq_done(struct request *rq) ...@@ -246,15 +244,13 @@ static void dm_softirq_done(struct request *rq)
bool mapped = true; bool mapped = true;
struct dm_rq_target_io *tio = tio_from_request(rq); struct dm_rq_target_io *tio = tio_from_request(rq);
struct request *clone = tio->clone; struct request *clone = tio->clone;
int rw;
if (!clone) { if (!clone) {
struct mapped_device *md = tio->md; struct mapped_device *md = tio->md;
rq_end_stats(md, rq); rq_end_stats(md, rq);
rw = rq_data_dir(rq);
blk_mq_end_request(rq, tio->error); blk_mq_end_request(rq, tio->error);
rq_completed(md, rw, false); rq_completed(md);
return; return;
} }
...@@ -376,7 +372,6 @@ static int map_request(struct dm_rq_target_io *tio) ...@@ -376,7 +372,6 @@ static int map_request(struct dm_rq_target_io *tio)
blk_status_t ret; blk_status_t ret;
r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone); r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
check_again:
switch (r) { switch (r) {
case DM_MAPIO_SUBMITTED: case DM_MAPIO_SUBMITTED:
/* The target has taken the I/O to submit by itself later */ /* The target has taken the I/O to submit by itself later */
...@@ -396,8 +391,7 @@ static int map_request(struct dm_rq_target_io *tio) ...@@ -396,8 +391,7 @@ static int map_request(struct dm_rq_target_io *tio)
blk_rq_unprep_clone(clone); blk_rq_unprep_clone(clone);
tio->ti->type->release_clone_rq(clone); tio->ti->type->release_clone_rq(clone);
tio->clone = NULL; tio->clone = NULL;
r = DM_MAPIO_REQUEUE; return DM_MAPIO_REQUEUE;
goto check_again;
} }
break; break;
case DM_MAPIO_REQUEUE: case DM_MAPIO_REQUEUE:
...@@ -507,7 +501,7 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -507,7 +501,7 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
if (map_request(tio) == DM_MAPIO_REQUEUE) { if (map_request(tio) == DM_MAPIO_REQUEUE) {
/* Undo dm_start_request() before requeuing */ /* Undo dm_start_request() before requeuing */
rq_end_stats(md, rq); rq_end_stats(md, rq);
rq_completed(md, rq_data_dir(rq), false); rq_completed(md);
return BLK_STS_RESOURCE; return BLK_STS_RESOURCE;
} }
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/log2.h> #include <linux/log2.h>
#include <linux/dm-kcopyd.h> #include <linux/dm-kcopyd.h>
#include <linux/semaphore.h>
#include "dm.h" #include "dm.h"
...@@ -105,6 +106,9 @@ struct dm_snapshot { ...@@ -105,6 +106,9 @@ struct dm_snapshot {
/* The on disk metadata handler */ /* The on disk metadata handler */
struct dm_exception_store *store; struct dm_exception_store *store;
/* Maximum number of in-flight COW jobs. */
struct semaphore cow_count;
struct dm_kcopyd_client *kcopyd_client; struct dm_kcopyd_client *kcopyd_client;
/* Wait for events based on state_bits */ /* Wait for events based on state_bits */
...@@ -145,6 +149,19 @@ struct dm_snapshot { ...@@ -145,6 +149,19 @@ struct dm_snapshot {
#define RUNNING_MERGE 0 #define RUNNING_MERGE 0
#define SHUTDOWN_MERGE 1 #define SHUTDOWN_MERGE 1
/*
* Maximum number of chunks being copied on write.
*
* The value was decided experimentally as a trade-off between memory
* consumption, stalling the kernel's workqueues and maintaining a high enough
* throughput.
*/
#define DEFAULT_COW_THRESHOLD 2048
static int cow_threshold = DEFAULT_COW_THRESHOLD;
module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644);
MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write");
DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
"A percentage of time allocated for copy on write"); "A percentage of time allocated for copy on write");
...@@ -1190,6 +1207,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1190,6 +1207,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad_hash_tables; goto bad_hash_tables;
} }
sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX);
s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
if (IS_ERR(s->kcopyd_client)) { if (IS_ERR(s->kcopyd_client)) {
r = PTR_ERR(s->kcopyd_client); r = PTR_ERR(s->kcopyd_client);
...@@ -1575,6 +1594,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) ...@@ -1575,6 +1594,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
rb_link_node(&pe->out_of_order_node, parent, p); rb_link_node(&pe->out_of_order_node, parent, p);
rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree); rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree);
} }
up(&s->cow_count);
} }
/* /*
...@@ -1598,6 +1618,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) ...@@ -1598,6 +1618,7 @@ static void start_copy(struct dm_snap_pending_exception *pe)
dest.count = src.count; dest.count = src.count;
/* Hand over to kcopyd */ /* Hand over to kcopyd */
down(&s->cow_count);
dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
} }
...@@ -1617,6 +1638,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe, ...@@ -1617,6 +1638,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe,
pe->full_bio = bio; pe->full_bio = bio;
pe->full_bio_end_io = bio->bi_end_io; pe->full_bio_end_io = bio->bi_end_io;
down(&s->cow_count);
callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
copy_callback, pe); copy_callback, pe);
......
...@@ -1927,6 +1927,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, ...@@ -1927,6 +1927,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
*/ */
if (blk_queue_is_zoned(q)) if (blk_queue_is_zoned(q))
blk_revalidate_disk_zones(t->md->disk); blk_revalidate_disk_zones(t->md->disk);
/* Allow reads to exceed readahead limits */
q->backing_dev_info->io_pages = limits->max_sectors >> (PAGE_SHIFT - 9);
} }
unsigned int dm_table_get_num_targets(struct dm_table *t) unsigned int dm_table_get_num_targets(struct dm_table *t)
......
...@@ -78,7 +78,7 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -78,7 +78,7 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto err; goto err;
} }
if (sscanf(argv[4], "%llu%c", &start, &dummy) != 1) { if (sscanf(argv[4], "%llu%c", &start, &dummy) != 1 || start != (sector_t)start) {
ti->error = "Invalid striped device offset"; ti->error = "Invalid striped device offset";
goto err; goto err;
} }
......
...@@ -1040,6 +1040,15 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -1040,6 +1040,15 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
v->tfm = NULL; v->tfm = NULL;
goto bad; goto bad;
} }
/*
* dm-verity performance can vary greatly depending on which hash
* algorithm implementation is used. Help people debug performance
* problems by logging the ->cra_driver_name.
*/
DMINFO("%s using implementation \"%s\"", v->alg_name,
crypto_hash_alg_common(v->tfm)->base.cra_driver_name);
v->digest_size = crypto_ahash_digestsize(v->tfm); v->digest_size = crypto_ahash_digestsize(v->tfm);
if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) { if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
ti->error = "Digest size too big"; ti->error = "Digest size too big";
......
...@@ -2061,7 +2061,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -2061,7 +2061,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
if (IS_ERR(wc->flush_thread)) { if (IS_ERR(wc->flush_thread)) {
r = PTR_ERR(wc->flush_thread); r = PTR_ERR(wc->flush_thread);
wc->flush_thread = NULL; wc->flush_thread = NULL;
ti->error = "Couldn't spawn endio thread"; ti->error = "Couldn't spawn flush thread";
goto bad; goto bad;
} }
wake_up_process(wc->flush_thread); wake_up_process(wc->flush_thread);
......
...@@ -1486,11 +1486,9 @@ static bool is_split_required_for_discard(struct dm_target *ti) ...@@ -1486,11 +1486,9 @@ static bool is_split_required_for_discard(struct dm_target *ti)
} }
static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
get_num_bios_fn get_num_bios, unsigned num_bios, bool is_split_required)
is_split_required_fn is_split_required)
{ {
unsigned len; unsigned len;
unsigned num_bios;
/* /*
* Even though the device advertised support for this type of * Even though the device advertised support for this type of
...@@ -1498,11 +1496,10 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target * ...@@ -1498,11 +1496,10 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *
* reconfiguration might also have changed that since the * reconfiguration might also have changed that since the
* check was performed. * check was performed.
*/ */
num_bios = get_num_bios ? get_num_bios(ti) : 0;
if (!num_bios) if (!num_bios)
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (is_split_required && !is_split_required(ti)) if (!is_split_required)
len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
else else
len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti)); len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
...@@ -1517,23 +1514,23 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target * ...@@ -1517,23 +1514,23 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *
static int __send_discard(struct clone_info *ci, struct dm_target *ti) static int __send_discard(struct clone_info *ci, struct dm_target *ti)
{ {
return __send_changing_extent_only(ci, ti, get_num_discard_bios, return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti),
is_split_required_for_discard); is_split_required_for_discard(ti));
} }
static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti) static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti)
{ {
return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios, NULL); return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti), false);
} }
static int __send_write_same(struct clone_info *ci, struct dm_target *ti) static int __send_write_same(struct clone_info *ci, struct dm_target *ti)
{ {
return __send_changing_extent_only(ci, ti, get_num_write_same_bios, NULL); return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti), false);
} }
static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti) static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti)
{ {
return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios, NULL); return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti), false);
} }
static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti, static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti,
...@@ -1716,10 +1713,7 @@ static blk_qc_t __process_bio(struct mapped_device *md, ...@@ -1716,10 +1713,7 @@ static blk_qc_t __process_bio(struct mapped_device *md,
return ret; return ret;
} }
typedef blk_qc_t (process_bio_fn)(struct mapped_device *, struct dm_table *, struct bio *); static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
static blk_qc_t __dm_make_request(struct request_queue *q, struct bio *bio,
process_bio_fn process_bio)
{ {
struct mapped_device *md = q->queuedata; struct mapped_device *md = q->queuedata;
blk_qc_t ret = BLK_QC_T_NONE; blk_qc_t ret = BLK_QC_T_NONE;
...@@ -1739,26 +1733,15 @@ static blk_qc_t __dm_make_request(struct request_queue *q, struct bio *bio, ...@@ -1739,26 +1733,15 @@ static blk_qc_t __dm_make_request(struct request_queue *q, struct bio *bio,
return ret; return ret;
} }
ret = process_bio(md, map, bio); if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED)
ret = __process_bio(md, map, bio);
else
ret = __split_and_process_bio(md, map, bio);
dm_put_live_table(md, srcu_idx); dm_put_live_table(md, srcu_idx);
return ret; return ret;
} }
/*
* The request function that remaps the bio to one target and
* splits off any remainder.
*/
static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
{
return __dm_make_request(q, bio, __split_and_process_bio);
}
static blk_qc_t dm_make_request_nvme(struct request_queue *q, struct bio *bio)
{
return __dm_make_request(q, bio, __process_bio);
}
static int dm_any_congested(void *congested_data, int bdi_bits) static int dm_any_congested(void *congested_data, int bdi_bits)
{ {
int r = bdi_bits; int r = bdi_bits;
...@@ -2246,12 +2229,9 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) ...@@ -2246,12 +2229,9 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
break; break;
case DM_TYPE_BIO_BASED: case DM_TYPE_BIO_BASED:
case DM_TYPE_DAX_BIO_BASED: case DM_TYPE_DAX_BIO_BASED:
dm_init_normal_md_queue(md);
blk_queue_make_request(md->queue, dm_make_request);
break;
case DM_TYPE_NVME_BIO_BASED: case DM_TYPE_NVME_BIO_BASED:
dm_init_normal_md_queue(md); dm_init_normal_md_queue(md);
blk_queue_make_request(md->queue, dm_make_request_nvme); blk_queue_make_request(md->queue, dm_make_request);
break; break;
case DM_TYPE_NONE: case DM_TYPE_NONE:
WARN_ON_ONCE(true); WARN_ON_ONCE(true);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment