Commit 8023e144 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe

block: move the poll flag to queue_limits

Move the poll flag into the queue_limits feature field so that it can
be set atomically with the queue frozen.

Stacking drivers are simplified in that they now can simply set the
flag, and blk_stack_limits will clear it when the features is not
supported by any of the underlying devices.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDamien Le Moal <dlemoal@kernel.org>
Reviewed-by: default avatarHannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20240617060532.127975-22-hch@lst.deSigned-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent f467fee4
...@@ -791,7 +791,7 @@ void submit_bio_noacct(struct bio *bio) ...@@ -791,7 +791,7 @@ void submit_bio_noacct(struct bio *bio)
} }
} }
if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) if (!(q->limits.features & BLK_FEAT_POLL))
bio_clear_polled(bio); bio_clear_polled(bio);
switch (bio_op(bio)) { switch (bio_op(bio)) {
...@@ -915,8 +915,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) ...@@ -915,8 +915,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
return 0; return 0;
q = bdev_get_queue(bdev); q = bdev_get_queue(bdev);
if (cookie == BLK_QC_T_NONE || if (cookie == BLK_QC_T_NONE || !(q->limits.features & BLK_FEAT_POLL))
!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
return 0; return 0;
blk_flush_plug(current->plug, false); blk_flush_plug(current->plug, false);
......
...@@ -87,7 +87,6 @@ static const char *const blk_queue_flag_name[] = { ...@@ -87,7 +87,6 @@ static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(NOXMERGES), QUEUE_FLAG_NAME(NOXMERGES),
QUEUE_FLAG_NAME(SAME_FORCE), QUEUE_FLAG_NAME(SAME_FORCE),
QUEUE_FLAG_NAME(INIT_DONE), QUEUE_FLAG_NAME(INIT_DONE),
QUEUE_FLAG_NAME(POLL),
QUEUE_FLAG_NAME(STATS), QUEUE_FLAG_NAME(STATS),
QUEUE_FLAG_NAME(REGISTERED), QUEUE_FLAG_NAME(REGISTERED),
QUEUE_FLAG_NAME(QUIESCED), QUEUE_FLAG_NAME(QUIESCED),
......
...@@ -4109,6 +4109,12 @@ void blk_mq_release(struct request_queue *q) ...@@ -4109,6 +4109,12 @@ void blk_mq_release(struct request_queue *q)
blk_mq_sysfs_deinit(q); blk_mq_sysfs_deinit(q);
} }
static bool blk_mq_can_poll(struct blk_mq_tag_set *set)
{
return set->nr_maps > HCTX_TYPE_POLL &&
set->map[HCTX_TYPE_POLL].nr_queues;
}
struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set, struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set,
struct queue_limits *lim, void *queuedata) struct queue_limits *lim, void *queuedata)
{ {
...@@ -4119,6 +4125,8 @@ struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set, ...@@ -4119,6 +4125,8 @@ struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set,
if (!lim) if (!lim)
lim = &default_lim; lim = &default_lim;
lim->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT; lim->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT;
if (blk_mq_can_poll(set))
lim->features |= BLK_FEAT_POLL;
q = blk_alloc_queue(lim, set->numa_node); q = blk_alloc_queue(lim, set->numa_node);
if (IS_ERR(q)) if (IS_ERR(q))
...@@ -4273,17 +4281,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, ...@@ -4273,17 +4281,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_lock);
} }
static void blk_mq_update_poll_flag(struct request_queue *q)
{
struct blk_mq_tag_set *set = q->tag_set;
if (set->nr_maps > HCTX_TYPE_POLL &&
set->map[HCTX_TYPE_POLL].nr_queues)
blk_queue_flag_set(QUEUE_FLAG_POLL, q);
else
blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
}
int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
struct request_queue *q) struct request_queue *q)
{ {
...@@ -4311,7 +4308,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, ...@@ -4311,7 +4308,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
q->tag_set = set; q->tag_set = set;
q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
blk_mq_update_poll_flag(q);
INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work); INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
INIT_LIST_HEAD(&q->flush_list); INIT_LIST_HEAD(&q->flush_list);
...@@ -4798,8 +4794,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, ...@@ -4798,8 +4794,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
fallback: fallback:
blk_mq_update_queue_map(set); blk_mq_update_queue_map(set);
list_for_each_entry(q, &set->tag_list, tag_set_list) { list_for_each_entry(q, &set->tag_list, tag_set_list) {
struct queue_limits lim;
blk_mq_realloc_hw_ctxs(set, q); blk_mq_realloc_hw_ctxs(set, q);
blk_mq_update_poll_flag(q);
if (q->nr_hw_queues != set->nr_hw_queues) { if (q->nr_hw_queues != set->nr_hw_queues) {
int i = prev_nr_hw_queues; int i = prev_nr_hw_queues;
...@@ -4811,6 +4809,13 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, ...@@ -4811,6 +4809,13 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
set->nr_hw_queues = prev_nr_hw_queues; set->nr_hw_queues = prev_nr_hw_queues;
goto fallback; goto fallback;
} }
lim = queue_limits_start_update(q);
if (blk_mq_can_poll(set))
lim.features |= BLK_FEAT_POLL;
else
lim.features &= ~BLK_FEAT_POLL;
if (queue_limits_commit_update(q, &lim) < 0)
pr_warn("updating the poll flag failed\n");
blk_mq_map_swqueue(q); blk_mq_map_swqueue(q);
} }
......
...@@ -460,13 +460,15 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, ...@@ -460,13 +460,15 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->features |= (b->features & BLK_FEAT_INHERIT_MASK); t->features |= (b->features & BLK_FEAT_INHERIT_MASK);
/* /*
* BLK_FEAT_NOWAIT needs to be supported both by the stacking driver * BLK_FEAT_NOWAIT and BLK_FEAT_POLL need to be supported both by the
* and all underlying devices. The stacking driver sets the flag * stacking driver and all underlying devices. The stacking driver sets
* before stacking the limits, and this will clear the flag if any * the flags before stacking the limits, and this will clear the flags
* of the underlying devices does not support it. * if any of the underlying devices does not support it.
*/ */
if (!(b->features & BLK_FEAT_NOWAIT)) if (!(b->features & BLK_FEAT_NOWAIT))
t->features &= ~BLK_FEAT_NOWAIT; t->features &= ~BLK_FEAT_NOWAIT;
if (!(b->features & BLK_FEAT_POLL))
t->features &= ~BLK_FEAT_POLL;
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_user_sectors = min_not_zero(t->max_user_sectors, t->max_user_sectors = min_not_zero(t->max_user_sectors,
......
...@@ -394,13 +394,13 @@ static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page, ...@@ -394,13 +394,13 @@ static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
static ssize_t queue_poll_show(struct request_queue *q, char *page) static ssize_t queue_poll_show(struct request_queue *q, char *page)
{ {
return queue_var_show(test_bit(QUEUE_FLAG_POLL, &q->queue_flags), page); return queue_var_show(q->limits.features & BLK_FEAT_POLL, page);
} }
static ssize_t queue_poll_store(struct request_queue *q, const char *page, static ssize_t queue_poll_store(struct request_queue *q, const char *page,
size_t count) size_t count)
{ {
if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) if (!(q->limits.features & BLK_FEAT_POLL))
return -EINVAL; return -EINVAL;
pr_info_ratelimited("writes to the poll attribute are ignored.\n"); pr_info_ratelimited("writes to the poll attribute are ignored.\n");
pr_info_ratelimited("please use driver specific parameters instead.\n"); pr_info_ratelimited("please use driver specific parameters instead.\n");
......
...@@ -582,7 +582,7 @@ int dm_split_args(int *argc, char ***argvp, char *input) ...@@ -582,7 +582,7 @@ int dm_split_args(int *argc, char ***argvp, char *input)
static void dm_set_stacking_limits(struct queue_limits *limits) static void dm_set_stacking_limits(struct queue_limits *limits)
{ {
blk_set_stacking_limits(limits); blk_set_stacking_limits(limits);
limits->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT; limits->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL;
} }
/* /*
...@@ -1024,14 +1024,13 @@ bool dm_table_request_based(struct dm_table *t) ...@@ -1024,14 +1024,13 @@ bool dm_table_request_based(struct dm_table *t)
return __table_type_request_based(dm_table_get_type(t)); return __table_type_request_based(dm_table_get_type(t));
} }
static bool dm_table_supports_poll(struct dm_table *t);
static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md) static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
{ {
enum dm_queue_mode type = dm_table_get_type(t); enum dm_queue_mode type = dm_table_get_type(t);
unsigned int per_io_data_size = 0, front_pad, io_front_pad; unsigned int per_io_data_size = 0, front_pad, io_front_pad;
unsigned int min_pool_size = 0, pool_size; unsigned int min_pool_size = 0, pool_size;
struct dm_md_mempools *pools; struct dm_md_mempools *pools;
unsigned int bioset_flags = 0;
if (unlikely(type == DM_TYPE_NONE)) { if (unlikely(type == DM_TYPE_NONE)) {
DMERR("no table type is set, can't allocate mempools"); DMERR("no table type is set, can't allocate mempools");
...@@ -1048,6 +1047,9 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * ...@@ -1048,6 +1047,9 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
goto init_bs; goto init_bs;
} }
if (md->queue->limits.features & BLK_FEAT_POLL)
bioset_flags |= BIOSET_PERCPU_CACHE;
for (unsigned int i = 0; i < t->num_targets; i++) { for (unsigned int i = 0; i < t->num_targets; i++) {
struct dm_target *ti = dm_table_get_target(t, i); struct dm_target *ti = dm_table_get_target(t, i);
...@@ -1060,8 +1062,7 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * ...@@ -1060,8 +1062,7 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
io_front_pad = roundup(per_io_data_size, io_front_pad = roundup(per_io_data_size,
__alignof__(struct dm_io)) + DM_IO_BIO_OFFSET; __alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
if (bioset_init(&pools->io_bs, pool_size, io_front_pad, if (bioset_init(&pools->io_bs, pool_size, io_front_pad, bioset_flags))
dm_table_supports_poll(t) ? BIOSET_PERCPU_CACHE : 0))
goto out_free_pools; goto out_free_pools;
if (t->integrity_supported && if (t->integrity_supported &&
bioset_integrity_create(&pools->io_bs, pool_size)) bioset_integrity_create(&pools->io_bs, pool_size))
...@@ -1404,14 +1405,6 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) ...@@ -1404,14 +1405,6 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
return &t->targets[(KEYS_PER_NODE * n) + k]; return &t->targets[(KEYS_PER_NODE * n) + k];
} }
static int device_not_poll_capable(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
struct request_queue *q = bdev_get_queue(dev->bdev);
return !test_bit(QUEUE_FLAG_POLL, &q->queue_flags);
}
/* /*
* type->iterate_devices() should be called when the sanity check needs to * type->iterate_devices() should be called when the sanity check needs to
* iterate and check all underlying data devices. iterate_devices() will * iterate and check all underlying data devices. iterate_devices() will
...@@ -1459,19 +1452,6 @@ static int count_device(struct dm_target *ti, struct dm_dev *dev, ...@@ -1459,19 +1452,6 @@ static int count_device(struct dm_target *ti, struct dm_dev *dev,
return 0; return 0;
} }
static bool dm_table_supports_poll(struct dm_table *t)
{
for (unsigned int i = 0; i < t->num_targets; i++) {
struct dm_target *ti = dm_table_get_target(t, i);
if (!ti->type->iterate_devices ||
ti->type->iterate_devices(ti, device_not_poll_capable, NULL))
return false;
}
return true;
}
/* /*
* Check whether a table has no data devices attached using each * Check whether a table has no data devices attached using each
* target's iterate_devices method. * target's iterate_devices method.
...@@ -1817,6 +1797,13 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, ...@@ -1817,6 +1797,13 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
if (!dm_table_supports_nowait(t)) if (!dm_table_supports_nowait(t))
limits->features &= ~BLK_FEAT_NOWAIT; limits->features &= ~BLK_FEAT_NOWAIT;
/*
* The current polling impementation does not support request based
* stacking.
*/
if (!__table_type_bio_based(t->type))
limits->features &= ~BLK_FEAT_POLL;
if (!dm_table_supports_discards(t)) { if (!dm_table_supports_discards(t)) {
limits->max_hw_discard_sectors = 0; limits->max_hw_discard_sectors = 0;
limits->discard_granularity = 0; limits->discard_granularity = 0;
...@@ -1858,21 +1845,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, ...@@ -1858,21 +1845,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
return r; return r;
dm_update_crypto_profile(q, t); dm_update_crypto_profile(q, t);
/*
* Check for request-based device is left to
* dm_mq_init_request_queue()->blk_mq_init_allocated_queue().
*
* For bio-based device, only set QUEUE_FLAG_POLL when all
* underlying devices supporting polling.
*/
if (__table_type_bio_based(t->type)) {
if (dm_table_supports_poll(t))
blk_queue_flag_set(QUEUE_FLAG_POLL, q);
else
blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
}
return 0; return 0;
} }
......
...@@ -538,7 +538,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) ...@@ -538,7 +538,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
blk_set_stacking_limits(&lim); blk_set_stacking_limits(&lim);
lim.dma_alignment = 3; lim.dma_alignment = 3;
lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT; lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL;
if (head->ids.csi != NVME_CSI_ZNS) if (head->ids.csi != NVME_CSI_ZNS)
lim.max_zone_append_sectors = 0; lim.max_zone_append_sectors = 0;
...@@ -549,16 +549,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) ...@@ -549,16 +549,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
head->disk->private_data = head; head->disk->private_data = head;
sprintf(head->disk->disk_name, "nvme%dn%d", sprintf(head->disk->disk_name, "nvme%dn%d",
ctrl->subsys->instance, head->instance); ctrl->subsys->instance, head->instance);
/*
* This assumes all controllers that refer to a namespace either
* support poll queues or not. That is not a strict guarantee,
* but if the assumption is wrong the effect is only suboptimal
* performance but not correctness problem.
*/
if (ctrl->tagset->nr_maps > HCTX_TYPE_POLL &&
ctrl->tagset->map[HCTX_TYPE_POLL].nr_queues)
blk_queue_flag_set(QUEUE_FLAG_POLL, head->disk->queue);
return 0; return 0;
} }
......
...@@ -310,6 +310,9 @@ enum { ...@@ -310,6 +310,9 @@ enum {
/* supports DAX */ /* supports DAX */
BLK_FEAT_DAX = (1u << 8), BLK_FEAT_DAX = (1u << 8),
/* supports I/O polling */
BLK_FEAT_POLL = (1u << 9),
}; };
/* /*
...@@ -577,7 +580,6 @@ struct request_queue { ...@@ -577,7 +580,6 @@ struct request_queue {
#define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */
#define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */
#define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */
#define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */
#define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */
#define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */
#define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment