Commit fea9f92f authored by John Garry's avatar John Garry Committed by Jens Axboe

blk-mq: Optimise blk_mq_queue_tag_busy_iter() for shared tags

Kashyap reports high CPU usage in blk_mq_queue_tag_busy_iter() and callees
using megaraid SAS RAID card since moving to shared tags [0].

Previously, when shared tags was shared sbitmap, this function was less
than optimum since we would iter through all tags for all hctx's,
yet only ever match upto tagset depth number of rqs.

Since the change to shared tags, things are even less efficient if we have
parallel callers of blk_mq_queue_tag_busy_iter(). This is because in
bt_iter() -> blk_mq_find_and_get_req() there would be more contention on
accessing each request ref and tags->lock since they are now shared among
all HW queues.

Optimise by having separate calls to bt_for_each() for when we're using
shared tags. In this case no longer pass a hctx, as it is no longer
relevant, and teach bt_iter() about this.

Ming suggested something along the lines of this change, apart from a
different implementation.

[0] https://lore.kernel.org/linux-block/e4e92abbe9d52bcba6b8cc6c91c442cc@mail.gmail.com/Signed-off-by: default avatarJohn Garry <john.garry@huawei.com>
Reviewed-by: default avatarHannes Reinecke <hare@suse.de>
Reviewed-by: default avatarMing Lei <ming.lei@redhat.com>
Reported-and-tested-by: default avatarKashyap Desai <kashyap.desai@broadcom.com>
Fixes: e155b0c2 ("blk-mq: Use shared tags for shared sbitmap support")
Link: https://lore.kernel.org/r/1638794990-137490-4-git-send-email-john.garry@huawei.comSigned-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent fc39f8d2
......@@ -215,6 +215,7 @@ void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags)
struct bt_iter_data {
struct blk_mq_hw_ctx *hctx;
struct request_queue *q;
busy_tag_iter_fn *fn;
void *data;
bool reserved;
......@@ -238,11 +239,18 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
{
struct bt_iter_data *iter_data = data;
struct blk_mq_hw_ctx *hctx = iter_data->hctx;
struct blk_mq_tags *tags = hctx->tags;
struct request_queue *q = iter_data->q;
struct blk_mq_tag_set *set = q->tag_set;
bool reserved = iter_data->reserved;
struct blk_mq_tags *tags;
struct request *rq;
bool ret = true;
if (blk_mq_is_shared_tags(set->flags))
tags = set->shared_tags;
else
tags = hctx->tags;
if (!reserved)
bitnr += tags->nr_reserved_tags;
/*
......@@ -253,7 +261,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
if (!rq)
return true;
if (rq->q == hctx->queue && rq->mq_hctx == hctx)
if (rq->q == q && (!hctx || rq->mq_hctx == hctx))
ret = iter_data->fn(rq, iter_data->data, reserved);
blk_mq_put_rq_ref(rq);
return ret;
......@@ -262,6 +270,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
/**
* bt_for_each - iterate over the requests associated with a hardware queue
* @hctx: Hardware queue to examine.
* @q: Request queue to examine.
* @bt: sbitmap to examine. This is either the breserved_tags member
* or the bitmap_tags member of struct blk_mq_tags.
* @fn: Pointer to the function that will be called for each request
......@@ -273,14 +282,16 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
* @reserved: Indicates whether @bt is the breserved_tags member or the
* bitmap_tags member of struct blk_mq_tags.
*/
static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
busy_tag_iter_fn *fn, void *data, bool reserved)
static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct request_queue *q,
struct sbitmap_queue *bt, busy_tag_iter_fn *fn,
void *data, bool reserved)
{
struct bt_iter_data iter_data = {
.hctx = hctx,
.fn = fn,
.data = data,
.reserved = reserved,
.q = q,
};
sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
......@@ -460,9 +471,6 @@ EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request);
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
void *priv)
{
struct blk_mq_hw_ctx *hctx;
int i;
/*
* __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx
* while the queue is frozen. So we can use q_usage_counter to avoid
......@@ -471,19 +479,34 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
if (!percpu_ref_tryget(&q->q_usage_counter))
return;
queue_for_each_hw_ctx(q, hctx, i) {
struct blk_mq_tags *tags = hctx->tags;
/*
* If no software queues are currently mapped to this
* hardware queue, there's nothing to check
*/
if (!blk_mq_hw_queue_mapped(hctx))
continue;
if (blk_mq_is_shared_tags(q->tag_set->flags)) {
struct blk_mq_tags *tags = q->tag_set->shared_tags;
struct sbitmap_queue *bresv = &tags->breserved_tags;
struct sbitmap_queue *btags = &tags->bitmap_tags;
if (tags->nr_reserved_tags)
bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
bt_for_each(NULL, q, bresv, fn, priv, true);
bt_for_each(NULL, q, btags, fn, priv, false);
} else {
struct blk_mq_hw_ctx *hctx;
int i;
queue_for_each_hw_ctx(q, hctx, i) {
struct blk_mq_tags *tags = hctx->tags;
struct sbitmap_queue *bresv = &tags->breserved_tags;
struct sbitmap_queue *btags = &tags->bitmap_tags;
/*
* If no software queues are currently mapped to this
* hardware queue, there's nothing to check
*/
if (!blk_mq_hw_queue_mapped(hctx))
continue;
if (tags->nr_reserved_tags)
bt_for_each(hctx, q, bresv, fn, priv, true);
bt_for_each(hctx, q, btags, fn, priv, false);
}
}
blk_queue_exit(q);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment