Commit 7e284070 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.19/dm-changes' of...

Merge tag 'for-5.19/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - Enable DM core bioset's per-cpu bio cache if QUEUE_FLAG_POLL set.
   This change improves DM's hipri bio polling (REQ_POLLED) performance
   by 7 - 20% depending on the system.

 - Update DM core to use jump_labels to further reduce cost of unlikely
   branches for zoned block devices, dm-stats and swap_bios throttling.

 - Various DM core changes to reduce bio-based DM overhead and simplify
   IO accounting.

 - Fundamental DM core improvements to dm_io reference counting and the
   elimination of using bio_split()+bio_chain() -- instead DM's
   bio-based IO accounting is updated to account that a split occurred.

 - Improve DM core's abnormal bio processing to do less work.

 - Improve DM core's hipri polling support to use a single list rather
   than an hlist.

 - Update DM core to pass NULL bdev to bio_alloc_clone() so that
   initialization that isn't useful for DM can be elided.

 - Add cond_resched to DM stats' various loops that loop over all
   entries.

 - Fix incorrect error code return from DM integrity's constructor.

 - Make DM crypt's printing of the key constant-time.

 - Update bio-based DM multipath to provide high-resolution timer to the
   Historical Service Time (HST) path selector.

* tag 'for-5.19/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (26 commits)
  dm: pass NULL bdev to bio_alloc_clone
  dm cache metadata: remove unnecessary variable in __dump_mapping
  dm mpath: provide high-resolution timer to HST for bio-based
  dm crypt: make printing of the key constant-time
  dm integrity: fix error code in dm_integrity_ctr()
  dm stats: add cond_resched when looping over entries
  dm: improve abnormal bio processing
  dm: simplify bio-based IO accounting further
  dm: put all polled dm_io instances into a single list
  dm: improve dm_io reference counting
  dm: don't grab target io reference in dm_zone_map_bio
  dm: improve bio splitting and associated IO accounting
  dm: switch to bdev based IO accounting interfaces
  dm: pass dm_io instance to dm_io_acct directly
  dm: don't pass bio to __dm_start_io_acct and dm_end_io_acct
  dm: use bio_sectors in dm_aceept_partial_bio
  dm: simplify basic targets
  dm: conditionally enable branching for less used features
  dm: introduce dm_{get,put}_live_table_bio called from dm_submit_bio
  dm: move hot dm_io members to same cacheline as dm_target_io
  ...
parents 780d8ce7 ca522482
......@@ -1509,7 +1509,6 @@ int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
static int __dump_mapping(void *context, uint64_t cblock, void *leaf)
{
int r = 0;
__le64 value;
dm_oblock_t oblock;
unsigned flags;
......@@ -1517,7 +1516,7 @@ static int __dump_mapping(void *context, uint64_t cblock, void *leaf)
memcpy(&value, leaf, sizeof(value));
unpack_value(value, &oblock, &flags);
return r;
return 0;
}
static int __dump_mappings(struct dm_cache_metadata *cmd)
......
......@@ -13,6 +13,7 @@
#include <linux/ktime.h>
#include <linux/blk-mq.h>
#include <linux/blk-crypto-profile.h>
#include <linux/jump_label.h>
#include <trace/events/block.h>
......@@ -154,6 +155,10 @@ static inline struct dm_stats *dm_get_stats(struct mapped_device *md)
return &md->stats;
}
DECLARE_STATIC_KEY_FALSE(stats_enabled);
DECLARE_STATIC_KEY_FALSE(swap_bios_enabled);
DECLARE_STATIC_KEY_FALSE(zoned_enabled);
static inline bool dm_emulate_zone_append(struct mapped_device *md)
{
if (blk_queue_is_zoned(md->queue))
......@@ -237,6 +242,12 @@ static inline void dm_tio_set_flag(struct dm_target_io *tio, unsigned int bit)
tio->flags |= (1U << bit);
}
static inline bool dm_tio_is_normal(struct dm_target_io *tio)
{
return (dm_tio_flagged(tio, DM_TIO_INSIDE_DM_IO) &&
!dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO));
}
/*
* One of these is allocated per original bio.
* It contains the first clone used for that original.
......@@ -245,16 +256,20 @@ static inline void dm_tio_set_flag(struct dm_target_io *tio, unsigned int bit)
struct dm_io {
unsigned short magic;
blk_short_t flags;
atomic_t io_count;
struct mapped_device *md;
struct bio *orig_bio;
blk_status_t status;
spinlock_t lock;
unsigned long start_time;
void *data;
struct hlist_node node;
struct task_struct *map_task;
struct dm_io *next;
struct dm_stats_aux stats_aux;
blk_status_t status;
atomic_t io_count;
struct mapped_device *md;
/* The three fields represent mapped part of original bio */
struct bio *orig_bio;
unsigned int sector_offset; /* offset to end of orig_bio */
unsigned int sectors;
/* last member of dm_target_io is 'struct bio' */
struct dm_target_io tio;
};
......@@ -263,8 +278,8 @@ struct dm_io {
* dm_io flags
*/
enum {
DM_IO_START_ACCT,
DM_IO_ACCOUNTED
DM_IO_ACCOUNTED,
DM_IO_WAS_SPLIT
};
static inline bool dm_io_flagged(struct dm_io *io, unsigned int bit)
......@@ -277,13 +292,6 @@ static inline void dm_io_set_flag(struct dm_io *io, unsigned int bit)
io->flags |= (1U << bit);
}
static inline void dm_io_inc_pending(struct dm_io *io)
{
atomic_inc(&io->io_count);
}
void dm_io_dec_pending(struct dm_io *io, blk_status_t error);
static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
{
return &container_of(kobj, struct dm_kobject_holder, kobj)->completion;
......
......@@ -3439,6 +3439,11 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_SUBMITTED;
}
static char hex2asc(unsigned char c)
{
return c + '0' + ((unsigned)(9 - c) >> 4 & 0x27);
}
static void crypt_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen)
{
......@@ -3457,9 +3462,12 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
if (cc->key_size > 0) {
if (cc->key_string)
DMEMIT(":%u:%s", cc->key_size, cc->key_string);
else
for (i = 0; i < cc->key_size; i++)
DMEMIT("%02x", cc->key[i]);
else {
for (i = 0; i < cc->key_size; i++) {
DMEMIT("%c%c", hex2asc(cc->key[i] >> 4),
hex2asc(cc->key[i] & 0xf));
}
}
} else
DMEMIT("-");
......
......@@ -296,7 +296,6 @@ static int delay_map(struct dm_target *ti, struct bio *bio)
}
delayed->class = c;
bio_set_dev(bio, c->dev->bdev);
if (bio_sectors(bio))
bio->bi_iter.bi_sector = c->start + dm_target_offset(ti, bio->bi_iter.bi_sector);
return delay_bio(dc, c, bio);
......
......@@ -280,9 +280,7 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
struct flakey_c *fc = ti->private;
bio_set_dev(bio, fc->dev->bdev);
if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio)))
bio->bi_iter.bi_sector =
flakey_map_sector(ti, bio->bi_iter.bi_sector);
bio->bi_iter.bi_sector = flakey_map_sector(ti, bio->bi_iter.bi_sector);
}
static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
......
......@@ -4494,8 +4494,6 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
}
if (should_write_sb) {
int r;
init_journal(ic, 0, ic->journal_sections, 0);
r = dm_integrity_failed(ic);
if (unlikely(r)) {
......
......@@ -84,19 +84,12 @@ static sector_t linear_map_sector(struct dm_target *ti, sector_t bi_sector)
return lc->start + dm_target_offset(ti, bi_sector);
}
static void linear_map_bio(struct dm_target *ti, struct bio *bio)
static int linear_map(struct dm_target *ti, struct bio *bio)
{
struct linear_c *lc = ti->private;
bio_set_dev(bio, lc->dev->bdev);
if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio)))
bio->bi_iter.bi_sector =
linear_map_sector(ti, bio->bi_iter.bi_sector);
}
static int linear_map(struct dm_target *ti, struct bio *bio)
{
linear_map_bio(ti, bio);
bio->bi_iter.bi_sector = linear_map_sector(ti, bio->bi_iter.bi_sector);
return DM_MAPIO_REMAPPED;
}
......
......@@ -105,6 +105,7 @@ struct multipath {
struct dm_mpath_io {
struct pgpath *pgpath;
size_t nr_bytes;
u64 start_time_ns;
};
typedef int (*action_fn) (struct pgpath *pgpath);
......@@ -295,6 +296,7 @@ static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mp
mpio->nr_bytes = bio->bi_iter.bi_size;
mpio->pgpath = NULL;
mpio->start_time_ns = 0;
*mpio_p = mpio;
dm_bio_record(bio_details, bio);
......@@ -647,6 +649,9 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio,
mpio->pgpath = pgpath;
if (dm_ps_use_hr_timer(pgpath->pg->ps.type))
mpio->start_time_ns = ktime_get_ns();
bio->bi_status = 0;
bio_set_dev(bio, pgpath->path.dev->bdev);
bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
......@@ -1713,7 +1718,8 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes,
dm_start_time_ns_from_clone(clone));
(mpio->start_time_ns ?:
dm_start_time_ns_from_clone(clone)));
}
return r;
......
......@@ -26,11 +26,26 @@ struct path_selector {
void *context;
};
/*
* If a path selector uses this flag, a high resolution timer is used
* (via ktime_get_ns) to account for IO start time in BIO-based mpath.
* This improves performance of some path selectors (i.e. HST), in
* exchange for slightly higher overhead when submitting the BIO.
* The extra cost is usually offset by improved path selection for
* some benchmarks.
*
* This has no effect for request-based mpath, since it already uses a
* higher precision timer by default.
*/
#define DM_PS_USE_HR_TIMER 0x00000001
#define dm_ps_use_hr_timer(type) ((type)->features & DM_PS_USE_HR_TIMER)
/* Information about a path selector type */
struct path_selector_type {
char *name;
struct module *module;
unsigned int features;
unsigned int table_args;
unsigned int info_args;
......
......@@ -523,6 +523,7 @@ static int hst_end_io(struct path_selector *ps, struct dm_path *path,
static struct path_selector_type hst_ps = {
.name = "historical-service-time",
.module = THIS_MODULE,
.features = DM_PS_USE_HR_TIMER,
.table_args = 1,
.info_args = 3,
.create = hst_create,
......
......@@ -225,6 +225,7 @@ void dm_stats_cleanup(struct dm_stats *stats)
atomic_read(&shared->in_flight[READ]),
atomic_read(&shared->in_flight[WRITE]));
}
cond_resched();
}
dm_stat_free(&s->rcu_head);
}
......@@ -330,6 +331,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
for (ni = 0; ni < n_entries; ni++) {
atomic_set(&s->stat_shared[ni].in_flight[READ], 0);
atomic_set(&s->stat_shared[ni].in_flight[WRITE], 0);
cond_resched();
}
if (s->n_histogram_entries) {
......@@ -342,6 +344,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
for (ni = 0; ni < n_entries; ni++) {
s->stat_shared[ni].tmp.histogram = hi;
hi += s->n_histogram_entries + 1;
cond_resched();
}
}
......@@ -362,6 +365,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
for (ni = 0; ni < n_entries; ni++) {
p[ni].histogram = hi;
hi += s->n_histogram_entries + 1;
cond_resched();
}
}
}
......@@ -396,6 +400,9 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
dm_stats_recalc_precise_timestamps(stats);
if (!static_key_enabled(&stats_enabled.key))
static_branch_enable(&stats_enabled);
mutex_unlock(&stats->mutex);
resume_callback(md);
......@@ -497,6 +504,7 @@ static int dm_stats_list(struct dm_stats *stats, const char *program,
}
DMEMIT("\n");
}
cond_resched();
}
mutex_unlock(&stats->mutex);
......@@ -774,6 +782,7 @@ static void __dm_stat_clear(struct dm_stat *s, size_t idx_start, size_t idx_end,
local_irq_enable();
}
}
cond_resched();
}
}
......@@ -889,6 +898,8 @@ static int dm_stats_print(struct dm_stats *stats, int id,
if (unlikely(sz + 1 >= maxlen))
goto buffer_overflow;
cond_resched();
}
if (clear)
......
......@@ -719,6 +719,9 @@ int dm_table_add_target(struct dm_table *t, const char *type,
DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.",
dm_device_name(t->md), type);
if (tgt->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key))
static_branch_enable(&swap_bios_enabled);
return 0;
bad:
......@@ -1002,6 +1005,8 @@ bool dm_table_request_based(struct dm_table *t)
return __table_type_request_based(dm_table_get_type(t));
}
static int dm_table_supports_poll(struct dm_table *t);
static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
{
enum dm_queue_mode type = dm_table_get_type(t);
......@@ -1009,21 +1014,24 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
unsigned min_pool_size = 0;
struct dm_target *ti;
unsigned i;
bool poll_supported = false;
if (unlikely(type == DM_TYPE_NONE)) {
DMWARN("no table type is set, can't allocate mempools");
return -EINVAL;
}
if (__table_type_bio_based(type))
if (__table_type_bio_based(type)) {
for (i = 0; i < t->num_targets; i++) {
ti = t->targets + i;
per_io_data_size = max(per_io_data_size, ti->per_io_data_size);
min_pool_size = max(min_pool_size, ti->num_flush_bios);
}
poll_supported = !!dm_table_supports_poll(t);
}
t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported,
per_io_data_size, min_pool_size);
t->mempools = dm_alloc_md_mempools(md, type, per_io_data_size, min_pool_size,
t->integrity_supported, poll_supported);
if (!t->mempools)
return -ENOMEM;
......@@ -2035,6 +2043,8 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
r = dm_set_zones_restrictions(t, q);
if (r)
return r;
if (!static_key_enabled(&zoned_enabled.key))
static_branch_enable(&zoned_enabled);
}
dm_update_crypto_profile(q, t);
......
......@@ -550,13 +550,6 @@ int dm_zone_map_bio(struct dm_target_io *tio)
return DM_MAPIO_KILL;
}
/*
* The target map function may issue and complete the IO quickly.
* Take an extra reference on the IO to make sure it does disappear
* until we run dm_zone_map_bio_end().
*/
dm_io_inc_pending(io);
/* Let the target do its work */
r = ti->type->map(ti, clone);
switch (r) {
......@@ -587,9 +580,6 @@ int dm_zone_map_bio(struct dm_target_io *tio)
break;
}
/* Drop the extra reference on the IO */
dm_io_dec_pending(io, sts);
if (sts != BLK_STS_OK)
return DM_MAPIO_KILL;
......
This diff is collapsed.
......@@ -221,8 +221,8 @@ void dm_kcopyd_exit(void);
* Mempool operations
*/
struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_queue_mode type,
unsigned integrity, unsigned per_bio_data_size,
unsigned min_pool_size);
unsigned per_io_data_size, unsigned min_pool_size,
bool integrity, bool poll);
void dm_free_md_mempools(struct dm_md_mempools *pools);
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment