Commit f924ba70 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'for-4.11/block' into for-4.11/rq-refactor

Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parents 7a308bb3 400f73b2
......@@ -8604,10 +8604,10 @@ S: Maintained
F: drivers/net/ethernet/netronome/
NETWORK BLOCK DEVICE (NBD)
M: Markus Pargmann <mpa@pengutronix.de>
M: Josef Bacik <jbacik@fb.com>
S: Maintained
L: linux-block@vger.kernel.org
L: nbd-general@lists.sourceforge.net
T: git git://git.pengutronix.de/git/mpa/linux-nbd.git
F: Documentation/blockdev/nbd.txt
F: drivers/block/nbd.c
F: include/uapi/linux/nbd.h
......
......@@ -147,6 +147,18 @@ config BLK_WBT_MQ
Multiqueue currently doesn't have support for IO scheduling,
enabling this option is recommended.
config BLK_DEBUG_FS
bool "Block layer debugging information in debugfs"
default y
depends on DEBUG_FS
---help---
Include block layer debugging information in debugfs. This information
is mostly useful for kernel developers, but it doesn't incur any cost
at runtime.
Unless you are building a kernel for a tiny system, you should
say Y here.
menu "Partition Types"
source "block/partitions/Kconfig"
......
......@@ -63,6 +63,56 @@ config DEFAULT_IOSCHED
default "cfq" if DEFAULT_CFQ
default "noop" if DEFAULT_NOOP
config MQ_IOSCHED_DEADLINE
tristate "MQ deadline I/O scheduler"
default y
---help---
MQ version of the deadline IO scheduler.
config MQ_IOSCHED_NONE
bool
default y
choice
prompt "Default single-queue blk-mq I/O scheduler"
default DEFAULT_SQ_NONE
help
Select the I/O scheduler which will be used by default for blk-mq
managed block devices with a single queue.
config DEFAULT_SQ_DEADLINE
bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y
config DEFAULT_SQ_NONE
bool "None"
endchoice
config DEFAULT_SQ_IOSCHED
string
default "mq-deadline" if DEFAULT_SQ_DEADLINE
default "none" if DEFAULT_SQ_NONE
choice
prompt "Default multi-queue blk-mq I/O scheduler"
default DEFAULT_MQ_NONE
help
Select the I/O scheduler which will be used by default for blk-mq
managed block devices with multiple queues.
config DEFAULT_MQ_DEADLINE
bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y
config DEFAULT_MQ_NONE
bool "None"
endchoice
config DEFAULT_MQ_IOSCHED
string
default "mq-deadline" if DEFAULT_MQ_DEADLINE
default "none" if DEFAULT_MQ_NONE
endmenu
endif
......@@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \
blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
badblocks.o partitions/
......@@ -18,6 +18,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o
obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
......@@ -25,3 +26,4 @@ obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
obj-$(CONFIG_BLK_WBT) += blk-wbt.o
obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
......@@ -1223,7 +1223,10 @@ int blkcg_activate_policy(struct request_queue *q,
if (blkcg_policy_enabled(q, pol))
return 0;
blk_queue_bypass_start(q);
if (q->mq_ops)
blk_mq_freeze_queue(q);
else
blk_queue_bypass_start(q);
pd_prealloc:
if (!pd_prealloc) {
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node);
......@@ -1261,7 +1264,10 @@ int blkcg_activate_policy(struct request_queue *q,
spin_unlock_irq(q->queue_lock);
out_bypass_end:
blk_queue_bypass_end(q);
if (q->mq_ops)
blk_mq_unfreeze_queue(q);
else
blk_queue_bypass_end(q);
if (pd_prealloc)
pol->pd_free_fn(pd_prealloc);
return ret;
......@@ -1284,7 +1290,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
if (!blkcg_policy_enabled(q, pol))
return;
blk_queue_bypass_start(q);
if (q->mq_ops)
blk_mq_freeze_queue(q);
else
blk_queue_bypass_start(q);
spin_lock_irq(q->queue_lock);
__clear_bit(pol->plid, q->blkcg_pols);
......@@ -1304,7 +1314,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
}
spin_unlock_irq(q->queue_lock);
blk_queue_bypass_end(q);
if (q->mq_ops)
blk_mq_unfreeze_queue(q);
else
blk_queue_bypass_end(q);
}
EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
......
......@@ -39,6 +39,7 @@
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-sched.h"
#include "blk-wbt.h"
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
......@@ -134,6 +135,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->cmd = rq->__cmd;
rq->cmd_len = BLK_MAX_CDB;
rq->tag = -1;
rq->internal_tag = -1;
rq->start_time = jiffies;
set_start_time_ns(rq);
rq->part = NULL;
......@@ -1033,28 +1035,12 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
* Flush requests do not use the elevator so skip initialization.
* This allows a request to share the flush and elevator data.
*/
if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA))
if (op_is_flush(bio->bi_opf))
return false;
return true;
}
/**
* rq_ioc - determine io_context for request allocation
* @bio: request being allocated is for this bio (can be %NULL)
*
* Determine io_context to use for request allocation for @bio. May return
* %NULL if %current->io_context doesn't exist.
*/
static struct io_context *rq_ioc(struct bio *bio)
{
#ifdef CONFIG_BLK_CGROUP
if (bio && bio->bi_ioc)
return bio->bi_ioc;
#endif
return current->io_context;
}
/**
* __get_request - get a free request
* @rl: request list to allocate from
......@@ -1655,7 +1641,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
return BLK_QC_T_NONE;
}
if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) {
if (op_is_flush(bio->bi_opf)) {
spin_lock_irq(q->queue_lock);
where = ELEVATOR_INSERT_FLUSH;
goto get_rq;
......@@ -1894,7 +1880,7 @@ generic_make_request_checks(struct bio *bio)
* drivers without flush support don't have to worry
* about them.
*/
if ((bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) &&
if (op_is_flush(bio->bi_opf) &&
!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
if (!nr_sectors) {
......@@ -2143,7 +2129,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
if (q->mq_ops) {
if (blk_queue_io_stat(q))
blk_account_io_start(rq, true);
blk_mq_insert_request(rq, false, true, false);
blk_mq_sched_insert_request(rq, false, true, false, false);
return 0;
}
......@@ -2159,7 +2145,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
*/
BUG_ON(blk_queued_rq(rq));
if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))
if (op_is_flush(rq->cmd_flags))
where = ELEVATOR_INSERT_FLUSH;
add_acct_request(q, rq, where);
......@@ -3270,7 +3256,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
/*
* rq is already accounted, so use raw insert
*/
if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))
if (op_is_flush(rq->cmd_flags))
__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
else
__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
......
......@@ -9,6 +9,7 @@
#include <linux/sched/sysctl.h>
#include "blk.h"
#include "blk-mq-sched.h"
/*
* for max sense size
......@@ -65,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
* be reused after dying flag is set
*/
if (q->mq_ops) {
blk_mq_insert_request(rq, at_head, true, false);
blk_mq_sched_insert_request(rq, at_head, true, false, false);
return;
}
......
......@@ -74,6 +74,7 @@
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-tag.h"
#include "blk-mq-sched.h"
/* FLUSH/FUA sequences */
enum {
......@@ -391,9 +392,10 @@ static void mq_flush_data_end_io(struct request *rq, int error)
* the comment in flush_end_io().
*/
spin_lock_irqsave(&fq->mq_flush_lock, flags);
if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error))
blk_mq_run_hw_queue(hctx, true);
blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error);
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
blk_mq_run_hw_queue(hctx, true);
}
/**
......@@ -453,9 +455,9 @@ void blk_insert_flush(struct request *rq)
*/
if ((policy & REQ_FSEQ_DATA) &&
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
if (q->mq_ops) {
blk_mq_insert_request(rq, false, true, false);
} else
if (q->mq_ops)
blk_mq_sched_insert_request(rq, false, true, false, false);
else
list_add_tail(&rq->queuelist, &q->queue_head);
return;
}
......
......@@ -43,8 +43,10 @@ static void ioc_exit_icq(struct io_cq *icq)
if (icq->flags & ICQ_EXITED)
return;
if (et->ops.elevator_exit_icq_fn)
et->ops.elevator_exit_icq_fn(icq);
if (et->uses_mq && et->ops.mq.exit_icq)
et->ops.mq.exit_icq(icq);
else if (!et->uses_mq && et->ops.sq.elevator_exit_icq_fn)
et->ops.sq.elevator_exit_icq_fn(icq);
icq->flags |= ICQ_EXITED;
}
......@@ -383,8 +385,10 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
hlist_add_head(&icq->ioc_node, &ioc->icq_list);
list_add(&icq->q_node, &q->icq_list);
if (et->ops.elevator_init_icq_fn)
et->ops.elevator_init_icq_fn(icq);
if (et->uses_mq && et->ops.mq.init_icq)
et->ops.mq.init_icq(icq);
else if (!et->uses_mq && et->ops.sq.elevator_init_icq_fn)
et->ops.sq.elevator_init_icq_fn(icq);
} else {
kmem_cache_free(et->icq_cache, icq);
icq = ioc_lookup_icq(ioc, q);
......
......@@ -763,8 +763,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
{
struct elevator_queue *e = q->elevator;
if (e->type->ops.elevator_allow_rq_merge_fn)
if (!e->type->ops.elevator_allow_rq_merge_fn(q, rq, next))
if (!e->uses_mq && e->type->ops.sq.elevator_allow_rq_merge_fn)
if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next))
return 0;
return attempt_merge(q, rq, next);
......
/*
* Copyright (C) 2017 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/debugfs.h>
#include <linux/blk-mq.h>
#include "blk-mq.h"
#include "blk-mq-tag.h"
struct blk_mq_debugfs_attr {
const char *name;
umode_t mode;
const struct file_operations *fops;
};
static struct dentry *block_debugfs_root;
static int blk_mq_debugfs_seq_open(struct inode *inode, struct file *file,
const struct seq_operations *ops)
{
struct seq_file *m;
int ret;
ret = seq_open(file, ops);
if (!ret) {
m = file->private_data;
m->private = inode->i_private;
}
return ret;
}
static int hctx_state_show(struct seq_file *m, void *v)
{
struct blk_mq_hw_ctx *hctx = m->private;
seq_printf(m, "0x%lx\n", hctx->state);
return 0;
}
static int hctx_state_open(struct inode *inode, struct file *file)
{
return single_open(file, hctx_state_show, inode->i_private);
}
static const struct file_operations hctx_state_fops = {
.open = hctx_state_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int hctx_flags_show(struct seq_file *m, void *v)
{
struct blk_mq_hw_ctx *hctx = m->private;
seq_printf(m, "0x%lx\n", hctx->flags);
return 0;
}
static int hctx_flags_open(struct inode *inode, struct file *file)
{
return single_open(file, hctx_flags_show, inode->i_private);
}
static const struct file_operations hctx_flags_fops = {
.open = hctx_flags_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
{
struct request *rq = list_entry_rq(v);
seq_printf(m, "%p {.cmd_type=%u, .cmd_flags=0x%x, .rq_flags=0x%x, .tag=%d, .internal_tag=%d}\n",
rq, rq->cmd_type, rq->cmd_flags, (unsigned int)rq->rq_flags,
rq->tag, rq->internal_tag);
return 0;
}
static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos)
{
struct blk_mq_hw_ctx *hctx = m->private;
spin_lock(&hctx->lock);
return seq_list_start(&hctx->dispatch, *pos);
}
static void *hctx_dispatch_next(struct seq_file *m, void *v, loff_t *pos)
{
struct blk_mq_hw_ctx *hctx = m->private;
return seq_list_next(v, &hctx->dispatch, pos);
}
static void hctx_dispatch_stop(struct seq_file *m, void *v)
{
struct blk_mq_hw_ctx *hctx = m->private;
spin_unlock(&hctx->lock);
}
static const struct seq_operations hctx_dispatch_seq_ops = {
.start = hctx_dispatch_start,
.next = hctx_dispatch_next,
.stop = hctx_dispatch_stop,
.show = blk_mq_debugfs_rq_show,
};
static int hctx_dispatch_open(struct inode *inode, struct file *file)
{
return blk_mq_debugfs_seq_open(inode, file, &hctx_dispatch_seq_ops);
}
static const struct file_operations hctx_dispatch_fops = {
.open = hctx_dispatch_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
static int hctx_ctx_map_show(struct seq_file *m, void *v)
{
struct blk_mq_hw_ctx *hctx = m->private;
sbitmap_bitmap_show(&hctx->ctx_map, m);
return 0;
}
static int hctx_ctx_map_open(struct inode *inode, struct file *file)
{
return single_open(file, hctx_ctx_map_show, inode->i_private);
}
static const struct file_operations hctx_ctx_map_fops = {
.open = hctx_ctx_map_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static void blk_mq_debugfs_tags_show(struct seq_file *m,
struct blk_mq_tags *tags)
{
seq_printf(m, "nr_tags=%u\n", tags->nr_tags);
seq_printf(m, "nr_reserved_tags=%u\n", tags->nr_reserved_tags);
seq_printf(m, "active_queues=%d\n",
atomic_read(&tags->active_queues));
seq_puts(m, "\nbitmap_tags:\n");
sbitmap_queue_show(&tags->bitmap_tags, m);
if (tags->nr_reserved_tags) {
seq_puts(m, "\nbreserved_tags:\n");
sbitmap_queue_show(&tags->breserved_tags, m);
}
}
static int hctx_tags_show(struct seq_file *m, void *v)
{
struct blk_mq_hw_ctx *hctx = m->private;
struct request_queue *q = hctx->queue;
mutex_lock(&q->sysfs_lock);
if (hctx->tags)
blk_mq_debugfs_tags_show(m, hctx->tags);
mutex_unlock(&q->sysfs_lock);
return 0;
}
static int hctx_tags_open(struct inode *inode, struct file *file)
{
return single_open(file, hctx_tags_show, inode->i_private);
}
static const struct file_operations hctx_tags_fops = {
.open = hctx_tags_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int hctx_tags_bitmap_show(struct seq_file *m, void *v)
{
struct blk_mq_hw_ctx *hctx = m->private;
struct request_queue *q = hctx->queue;
mutex_lock(&q->sysfs_lock);
if (hctx->tags)
sbitmap_bitmap_show(&hctx->tags->bitmap_tags.sb, m);
mutex_unlock(&q->sysfs_lock);
return 0;
}
static int hctx_tags_bitmap_open(struct inode *inode, struct file *file)
{
return single_open(file, hctx_tags_bitmap_show, inode->i_private);
}
static const struct file_operations hctx_tags_bitmap_fops = {
.open = hctx_tags_bitmap_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int hctx_sched_tags_show(struct seq_file *m, void *v)
{
struct blk_mq_hw_ctx *hctx = m->private;
struct request_queue *q = hctx->queue;
mutex_lock(&q->sysfs_lock);
if (hctx->sched_tags)
blk_mq_debugfs_tags_show(m, hctx->sched_tags);
mutex_unlock(&q->sysfs_lock);
return 0;
}
static int hctx_sched_tags_open(struct inode *inode, struct file *file)
{
return single_open(file, hctx_sched_tags_show, inode->i_private);
}
static const struct file_operations hctx_sched_tags_fops = {
.open = hctx_sched_tags_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int hctx_sched_tags_bitmap_show(struct seq_file *m, void *v)
{
struct blk_mq_hw_ctx *hctx = m->private;
struct request_queue *q = hctx->queue;
mutex_lock(&q->sysfs_lock);
if (hctx->sched_tags)
sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags.sb, m);
mutex_unlock(&q->sysfs_lock);
return 0;
}
static int hctx_sched_tags_bitmap_open(struct inode *inode, struct file *file)
{
return single_open(file, hctx_sched_tags_bitmap_show, inode->i_private);
}
static const struct file_operations hctx_sched_tags_bitmap_fops = {
.open = hctx_sched_tags_bitmap_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int hctx_io_poll_show(struct seq_file *m, void *v)
{
struct blk_mq_hw_ctx *hctx = m->private;
seq_printf(m, "considered=%lu\n", hctx->poll_considered);
seq_printf(m, "invoked=%lu\n", hctx->poll_invoked);
seq_printf(m, "success=%lu\n", hctx->poll_success);
return 0;
}
static int hctx_io_poll_open(struct inode *inode, struct file *file)
{
return single_open(file, hctx_io_poll_show, inode->i_private);
}
static ssize_t hctx_io_poll_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct seq_file *m = file->private_data;
struct blk_mq_hw_ctx *hctx = m->private;
hctx->poll_considered = hctx->poll_invoked = hctx->poll_success = 0;
return count;
}
static const struct file_operations hctx_io_poll_fops = {
.open = hctx_io_poll_open,
.read = seq_read,
.write = hctx_io_poll_write,
.llseek = seq_lseek,
.release = single_release,
};
static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
{
seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu",
stat->nr_samples, stat->mean, stat->min, stat->max);
}
static int hctx_stats_show(struct seq_file *m, void *v)
{
struct blk_mq_hw_ctx *hctx = m->private;
struct blk_rq_stat stat[2];
blk_stat_init(&stat[BLK_STAT_READ]);
blk_stat_init(&stat[BLK_STAT_WRITE]);
blk_hctx_stat_get(hctx, stat);
seq_puts(m, "read: ");
print_stat(m, &stat[BLK_STAT_READ]);
seq_puts(m, "\n");
seq_puts(m, "write: ");
print_stat(m, &stat[BLK_STAT_WRITE]);
seq_puts(m, "\n");
return 0;
}
static int hctx_stats_open(struct inode *inode, struct file *file)
{
return single_open(file, hctx_stats_show, inode->i_private);
}
static ssize_t hctx_stats_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct seq_file *m = file->private_data;
struct blk_mq_hw_ctx *hctx = m->private;
struct blk_mq_ctx *ctx;
int i;
hctx_for_each_ctx(hctx, ctx, i) {
blk_stat_init(&ctx->stat[BLK_STAT_READ]);
blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
}
return count;
}
static const struct file_operations hctx_stats_fops = {
.open = hctx_stats_open,
.read = seq_read,
.write = hctx_stats_write,
.llseek = seq_lseek,
.release = single_release,
};
static int hctx_dispatched_show(struct seq_file *m, void *v)
{
struct blk_mq_hw_ctx *hctx = m->private;
int i;
seq_printf(m, "%8u\t%lu\n", 0U, hctx->dispatched[0]);
for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER - 1; i++) {
unsigned int d = 1U << (i - 1);
seq_printf(m, "%8u\t%lu\n", d, hctx->dispatched[i]);
}
seq_printf(m, "%8u+\t%lu\n", 1U << (i - 1), hctx->dispatched[i]);
return 0;
}
static int hctx_dispatched_open(struct inode *inode, struct file *file)
{
return single_open(file, hctx_dispatched_show, inode->i_private);
}
static ssize_t hctx_dispatched_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct seq_file *m = file->private_data;
struct blk_mq_hw_ctx *hctx = m->private;
int i;
for (i = 0; i < BLK_MQ_MAX_DISPATCH_ORDER; i++)
hctx->dispatched[i] = 0;
return count;
}
static const struct file_operations hctx_dispatched_fops = {
.open = hctx_dispatched_open,
.read = seq_read,
.write = hctx_dispatched_write,
.llseek = seq_lseek,
.release = single_release,
};
static int hctx_queued_show(struct seq_file *m, void *v)
{
struct blk_mq_hw_ctx *hctx = m->private;
seq_printf(m, "%lu\n", hctx->queued);
return 0;
}
static int hctx_queued_open(struct inode *inode, struct file *file)
{
return single_open(file, hctx_queued_show, inode->i_private);
}
static ssize_t hctx_queued_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct seq_file *m = file->private_data;
struct blk_mq_hw_ctx *hctx = m->private;
hctx->queued = 0;
return count;
}
static const struct file_operations hctx_queued_fops = {
.open = hctx_queued_open,
.read = seq_read,
.write = hctx_queued_write,
.llseek = seq_lseek,
.release = single_release,
};
static int hctx_run_show(struct seq_file *m, void *v)
{
struct blk_mq_hw_ctx *hctx = m->private;
seq_printf(m, "%lu\n", hctx->run);
return 0;
}
static int hctx_run_open(struct inode *inode, struct file *file)
{
return single_open(file, hctx_run_show, inode->i_private);
}
static ssize_t hctx_run_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct seq_file *m = file->private_data;
struct blk_mq_hw_ctx *hctx = m->private;
hctx->run = 0;
return count;
}
static const struct file_operations hctx_run_fops = {
.open = hctx_run_open,
.read = seq_read,
.write = hctx_run_write,
.llseek = seq_lseek,
.release = single_release,
};
static int hctx_active_show(struct seq_file *m, void *v)
{
struct blk_mq_hw_ctx *hctx = m->private;
seq_printf(m, "%d\n", atomic_read(&hctx->nr_active));
return 0;
}
static int hctx_active_open(struct inode *inode, struct file *file)
{
return single_open(file, hctx_active_show, inode->i_private);
}
static const struct file_operations hctx_active_fops = {
.open = hctx_active_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
{
struct blk_mq_ctx *ctx = m->private;
spin_lock(&ctx->lock);
return seq_list_start(&ctx->rq_list, *pos);
}
static void *ctx_rq_list_next(struct seq_file *m, void *v, loff_t *pos)
{
struct blk_mq_ctx *ctx = m->private;
return seq_list_next(v, &ctx->rq_list, pos);
}
static void ctx_rq_list_stop(struct seq_file *m, void *v)
{
struct blk_mq_ctx *ctx = m->private;
spin_unlock(&ctx->lock);
}
static const struct seq_operations ctx_rq_list_seq_ops = {
.start = ctx_rq_list_start,
.next = ctx_rq_list_next,
.stop = ctx_rq_list_stop,
.show = blk_mq_debugfs_rq_show,
};
static int ctx_rq_list_open(struct inode *inode, struct file *file)
{
return blk_mq_debugfs_seq_open(inode, file, &ctx_rq_list_seq_ops);
}
static const struct file_operations ctx_rq_list_fops = {
.open = ctx_rq_list_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
static int ctx_dispatched_show(struct seq_file *m, void *v)
{
struct blk_mq_ctx *ctx = m->private;
seq_printf(m, "%lu %lu\n", ctx->rq_dispatched[1], ctx->rq_dispatched[0]);
return 0;
}
static int ctx_dispatched_open(struct inode *inode, struct file *file)
{
return single_open(file, ctx_dispatched_show, inode->i_private);
}
static ssize_t ctx_dispatched_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct seq_file *m = file->private_data;
struct blk_mq_ctx *ctx = m->private;
ctx->rq_dispatched[0] = ctx->rq_dispatched[1] = 0;
return count;
}
static const struct file_operations ctx_dispatched_fops = {
.open = ctx_dispatched_open,
.read = seq_read,
.write = ctx_dispatched_write,
.llseek = seq_lseek,
.release = single_release,
};
static int ctx_merged_show(struct seq_file *m, void *v)
{
struct blk_mq_ctx *ctx = m->private;
seq_printf(m, "%lu\n", ctx->rq_merged);
return 0;
}
static int ctx_merged_open(struct inode *inode, struct file *file)
{
return single_open(file, ctx_merged_show, inode->i_private);
}
static ssize_t ctx_merged_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct seq_file *m = file->private_data;
struct blk_mq_ctx *ctx = m->private;
ctx->rq_merged = 0;
return count;
}
static const struct file_operations ctx_merged_fops = {
.open = ctx_merged_open,
.read = seq_read,
.write = ctx_merged_write,
.llseek = seq_lseek,
.release = single_release,
};
static int ctx_completed_show(struct seq_file *m, void *v)
{
struct blk_mq_ctx *ctx = m->private;
seq_printf(m, "%lu %lu\n", ctx->rq_completed[1], ctx->rq_completed[0]);
return 0;
}
static int ctx_completed_open(struct inode *inode, struct file *file)
{
return single_open(file, ctx_completed_show, inode->i_private);
}
static ssize_t ctx_completed_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct seq_file *m = file->private_data;
struct blk_mq_ctx *ctx = m->private;
ctx->rq_completed[0] = ctx->rq_completed[1] = 0;
return count;
}
static const struct file_operations ctx_completed_fops = {
.open = ctx_completed_open,
.read = seq_read,
.write = ctx_completed_write,
.llseek = seq_lseek,
.release = single_release,
};
static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
{"state", 0400, &hctx_state_fops},
{"flags", 0400, &hctx_flags_fops},
{"dispatch", 0400, &hctx_dispatch_fops},
{"ctx_map", 0400, &hctx_ctx_map_fops},
{"tags", 0400, &hctx_tags_fops},
{"tags_bitmap", 0400, &hctx_tags_bitmap_fops},
{"sched_tags", 0400, &hctx_sched_tags_fops},
{"sched_tags_bitmap", 0400, &hctx_sched_tags_bitmap_fops},
{"io_poll", 0600, &hctx_io_poll_fops},
{"stats", 0600, &hctx_stats_fops},
{"dispatched", 0600, &hctx_dispatched_fops},
{"queued", 0600, &hctx_queued_fops},
{"run", 0600, &hctx_run_fops},
{"active", 0400, &hctx_active_fops},
};
static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
{"rq_list", 0400, &ctx_rq_list_fops},
{"dispatched", 0600, &ctx_dispatched_fops},
{"merged", 0600, &ctx_merged_fops},
{"completed", 0600, &ctx_completed_fops},
};
int blk_mq_debugfs_register(struct request_queue *q, const char *name)
{
if (!block_debugfs_root)
return -ENOENT;
q->debugfs_dir = debugfs_create_dir(name, block_debugfs_root);
if (!q->debugfs_dir)
goto err;
if (blk_mq_debugfs_register_hctxs(q))
goto err;
return 0;
err:
blk_mq_debugfs_unregister(q);
return -ENOMEM;
}
void blk_mq_debugfs_unregister(struct request_queue *q)
{
debugfs_remove_recursive(q->debugfs_dir);
q->mq_debugfs_dir = NULL;
q->debugfs_dir = NULL;
}
static int blk_mq_debugfs_register_ctx(struct request_queue *q,
struct blk_mq_ctx *ctx,
struct dentry *hctx_dir)
{
struct dentry *ctx_dir;
char name[20];
int i;
snprintf(name, sizeof(name), "cpu%u", ctx->cpu);
ctx_dir = debugfs_create_dir(name, hctx_dir);
if (!ctx_dir)
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(blk_mq_debugfs_ctx_attrs); i++) {
const struct blk_mq_debugfs_attr *attr;
attr = &blk_mq_debugfs_ctx_attrs[i];
if (!debugfs_create_file(attr->name, attr->mode, ctx_dir, ctx,
attr->fops))
return -ENOMEM;
}
return 0;
}
static int blk_mq_debugfs_register_hctx(struct request_queue *q,
struct blk_mq_hw_ctx *hctx)
{
struct blk_mq_ctx *ctx;
struct dentry *hctx_dir;
char name[20];
int i;
snprintf(name, sizeof(name), "%u", hctx->queue_num);
hctx_dir = debugfs_create_dir(name, q->mq_debugfs_dir);
if (!hctx_dir)
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(blk_mq_debugfs_hctx_attrs); i++) {
const struct blk_mq_debugfs_attr *attr;
attr = &blk_mq_debugfs_hctx_attrs[i];
if (!debugfs_create_file(attr->name, attr->mode, hctx_dir, hctx,
attr->fops))
return -ENOMEM;
}
hctx_for_each_ctx(hctx, ctx, i) {
if (blk_mq_debugfs_register_ctx(q, ctx, hctx_dir))
return -ENOMEM;
}
return 0;
}
int blk_mq_debugfs_register_hctxs(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
int i;
if (!q->debugfs_dir)
return -ENOENT;
q->mq_debugfs_dir = debugfs_create_dir("mq", q->debugfs_dir);
if (!q->mq_debugfs_dir)
goto err;
queue_for_each_hw_ctx(q, hctx, i) {
if (blk_mq_debugfs_register_hctx(q, hctx))
goto err;
}
return 0;
err:
blk_mq_debugfs_unregister_hctxs(q);
return -ENOMEM;
}
void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
{
debugfs_remove_recursive(q->mq_debugfs_dir);
q->mq_debugfs_dir = NULL;
}
void blk_mq_debugfs_init(void)
{
block_debugfs_root = debugfs_create_dir("block", NULL);
}
/*
* blk-mq scheduling framework
*
* Copyright (C) 2016 Jens Axboe
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/blk-mq.h>
#include <trace/events/block.h>
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-sched.h"
#include "blk-mq-tag.h"
#include "blk-wbt.h"
void blk_mq_sched_free_hctx_data(struct request_queue *q,
void (*exit)(struct blk_mq_hw_ctx *))
{
struct blk_mq_hw_ctx *hctx;
int i;
queue_for_each_hw_ctx(q, hctx, i) {
if (exit && hctx->sched_data)
exit(hctx);
kfree(hctx->sched_data);
hctx->sched_data = NULL;
}
}
EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
int (*init)(struct blk_mq_hw_ctx *),
void (*exit)(struct blk_mq_hw_ctx *))
{
struct blk_mq_hw_ctx *hctx;
int ret;
int i;
queue_for_each_hw_ctx(q, hctx, i) {
hctx->sched_data = kmalloc_node(size, GFP_KERNEL, hctx->numa_node);
if (!hctx->sched_data) {
ret = -ENOMEM;
goto error;
}
if (init) {
ret = init(hctx);
if (ret) {
/*
* We don't want to give exit() a partially
* initialized sched_data. init() must clean up
* if it fails.
*/
kfree(hctx->sched_data);
hctx->sched_data = NULL;
goto error;
}
}
}
return 0;
error:
blk_mq_sched_free_hctx_data(q, exit);
return ret;
}
EXPORT_SYMBOL_GPL(blk_mq_sched_init_hctx_data);
static void __blk_mq_sched_assign_ioc(struct request_queue *q,
struct request *rq, struct io_context *ioc)
{
struct io_cq *icq;
spin_lock_irq(q->queue_lock);
icq = ioc_lookup_icq(ioc, q);
spin_unlock_irq(q->queue_lock);
if (!icq) {
icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
if (!icq)
return;
}
rq->elv.icq = icq;
if (!blk_mq_sched_get_rq_priv(q, rq)) {
rq->rq_flags |= RQF_ELVPRIV;
get_io_context(icq->ioc);
return;
}
rq->elv.icq = NULL;
}
static void blk_mq_sched_assign_ioc(struct request_queue *q,
struct request *rq, struct bio *bio)
{
struct io_context *ioc;
ioc = rq_ioc(bio);
if (ioc)
__blk_mq_sched_assign_ioc(q, rq, ioc);
}
struct request *blk_mq_sched_get_request(struct request_queue *q,
struct bio *bio,
unsigned int op,
struct blk_mq_alloc_data *data)
{
struct elevator_queue *e = q->elevator;
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
struct request *rq;
blk_queue_enter_live(q);
ctx = blk_mq_get_ctx(q);
hctx = blk_mq_map_queue(q, ctx->cpu);
blk_mq_set_alloc_data(data, q, data->flags, ctx, hctx);
if (e) {
data->flags |= BLK_MQ_REQ_INTERNAL;
/*
* Flush requests are special and go directly to the
* dispatch list.
*/
if (!op_is_flush(op) && e->type->ops.mq.get_request) {
rq = e->type->ops.mq.get_request(q, op, data);
if (rq)
rq->rq_flags |= RQF_QUEUED;
} else
rq = __blk_mq_alloc_request(data, op);
} else {
rq = __blk_mq_alloc_request(data, op);
if (rq)
data->hctx->tags->rqs[rq->tag] = rq;
}
if (rq) {
if (!op_is_flush(op)) {
rq->elv.icq = NULL;
if (e && e->type->icq_cache)
blk_mq_sched_assign_ioc(q, rq, bio);
}
data->hctx->queued++;
return rq;
}
blk_queue_exit(q);
return NULL;
}
void blk_mq_sched_put_request(struct request *rq)
{
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
if (rq->rq_flags & RQF_ELVPRIV) {
blk_mq_sched_put_rq_priv(rq->q, rq);
if (rq->elv.icq) {
put_io_context(rq->elv.icq->ioc);
rq->elv.icq = NULL;
}
}
if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
e->type->ops.mq.put_request(rq);
else
blk_mq_finish_request(rq);
}
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
{
struct elevator_queue *e = hctx->queue->elevator;
LIST_HEAD(rq_list);
if (unlikely(blk_mq_hctx_stopped(hctx)))
return;
hctx->run++;
/*
* If we have previous entries on our dispatch list, grab them first for
* more fair dispatch.
*/
if (!list_empty_careful(&hctx->dispatch)) {
spin_lock(&hctx->lock);
if (!list_empty(&hctx->dispatch))
list_splice_init(&hctx->dispatch, &rq_list);
spin_unlock(&hctx->lock);
}
/*
* Only ask the scheduler for requests, if we didn't have residual
* requests from the dispatch list. This is to avoid the case where
* we only ever dispatch a fraction of the requests available because
* of low device queue depth. Once we pull requests out of the IO
* scheduler, we can no longer merge or sort them. So it's best to
* leave them there for as long as we can. Mark the hw queue as
* needing a restart in that case.
*/
if (!list_empty(&rq_list)) {
blk_mq_sched_mark_restart(hctx);
blk_mq_dispatch_rq_list(hctx, &rq_list);
} else if (!e || !e->type->ops.mq.dispatch_request) {
blk_mq_flush_busy_ctxs(hctx, &rq_list);
blk_mq_dispatch_rq_list(hctx, &rq_list);
} else {
do {
struct request *rq;
rq = e->type->ops.mq.dispatch_request(hctx);
if (!rq)
break;
list_add(&rq->queuelist, &rq_list);
} while (blk_mq_dispatch_rq_list(hctx, &rq_list));
}
}
void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
struct list_head *rq_list,
struct request *(*get_rq)(struct blk_mq_hw_ctx *))
{
do {
struct request *rq;
rq = get_rq(hctx);
if (!rq)
break;
list_add_tail(&rq->queuelist, rq_list);
} while (1);
}
EXPORT_SYMBOL_GPL(blk_mq_sched_move_to_dispatch);
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio)
{
struct request *rq;
int ret;
ret = elv_merge(q, &rq, bio);
if (ret == ELEVATOR_BACK_MERGE) {
if (!blk_mq_sched_allow_merge(q, rq, bio))
return false;
if (bio_attempt_back_merge(q, rq, bio)) {
if (!attempt_back_merge(q, rq))
elv_merged_request(q, rq, ret);
return true;
}
} else if (ret == ELEVATOR_FRONT_MERGE) {
if (!blk_mq_sched_allow_merge(q, rq, bio))
return false;
if (bio_attempt_front_merge(q, rq, bio)) {
if (!attempt_front_merge(q, rq))
elv_merged_request(q, rq, ret);
return true;
}
}
return false;
}
EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
struct elevator_queue *e = q->elevator;
if (e->type->ops.mq.bio_merge) {
struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
blk_mq_put_ctx(ctx);
return e->type->ops.mq.bio_merge(hctx, bio);
}
return false;
}
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
{
return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
}
EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
void blk_mq_sched_request_inserted(struct request *rq)
{
trace_block_rq_insert(rq->q, rq);
}
EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq)
{
if (rq->tag == -1) {
rq->rq_flags |= RQF_SORTED;
return false;
}
/*
* If we already have a real request tag, send directly to
* the dispatch list.
*/
spin_lock(&hctx->lock);
list_add(&rq->queuelist, &hctx->dispatch);
spin_unlock(&hctx->lock);
return true;
}
EXPORT_SYMBOL_GPL(blk_mq_sched_bypass_insert);
static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
if (blk_mq_hctx_has_pending(hctx))
blk_mq_run_hw_queue(hctx, true);
}
}
void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx)
{
unsigned int i;
if (!(hctx->flags & BLK_MQ_F_TAG_SHARED))
blk_mq_sched_restart_hctx(hctx);
else {
struct request_queue *q = hctx->queue;
if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
return;
clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_sched_restart_hctx(hctx);
}
}
/*
* Add flush/fua to the queue. If we fail getting a driver tag, then
* punt to the requeue list. Requeue will re-invoke us from a context
* that's safe to block from.
*/
static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx,
struct request *rq, bool can_block)
{
if (blk_mq_get_driver_tag(rq, &hctx, can_block)) {
blk_insert_flush(rq);
blk_mq_run_hw_queue(hctx, true);
} else
blk_mq_add_to_requeue_list(rq, true, true);
}
void blk_mq_sched_insert_request(struct request *rq, bool at_head,
bool run_queue, bool async, bool can_block)
{
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
struct blk_mq_ctx *ctx = rq->mq_ctx;
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
if (rq->tag == -1 && op_is_flush(rq->cmd_flags)) {
blk_mq_sched_insert_flush(hctx, rq, can_block);
return;
}
if (e && e->type->ops.mq.insert_requests) {
LIST_HEAD(list);
list_add(&rq->queuelist, &list);
e->type->ops.mq.insert_requests(hctx, &list, at_head);
} else {
spin_lock(&ctx->lock);
__blk_mq_insert_request(hctx, rq, at_head);
spin_unlock(&ctx->lock);
}
if (run_queue)
blk_mq_run_hw_queue(hctx, async);
}
void blk_mq_sched_insert_requests(struct request_queue *q,
struct blk_mq_ctx *ctx,
struct list_head *list, bool run_queue_async)
{
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
struct elevator_queue *e = hctx->queue->elevator;
if (e && e->type->ops.mq.insert_requests)
e->type->ops.mq.insert_requests(hctx, list, false);
else
blk_mq_insert_requests(hctx, ctx, list);
blk_mq_run_hw_queue(hctx, run_queue_async);
}
static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx)
{
if (hctx->sched_tags) {
blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
blk_mq_free_rq_map(hctx->sched_tags);
hctx->sched_tags = NULL;
}
}
int blk_mq_sched_setup(struct request_queue *q)
{
struct blk_mq_tag_set *set = q->tag_set;
struct blk_mq_hw_ctx *hctx;
int ret, i;
/*
* Default to 256, since we don't split into sync/async like the
* old code did. Additionally, this is a per-hw queue depth.
*/
q->nr_requests = 2 * BLKDEV_MAX_RQ;
/*
* We're switching to using an IO scheduler, so setup the hctx
* scheduler tags and switch the request map from the regular
* tags to scheduler tags. First allocate what we need, so we
* can safely fail and fallback, if needed.
*/
ret = 0;
queue_for_each_hw_ctx(q, hctx, i) {
hctx->sched_tags = blk_mq_alloc_rq_map(set, i, q->nr_requests, 0);
if (!hctx->sched_tags) {
ret = -ENOMEM;
break;
}
ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests);
if (ret)
break;
}
/*
* If we failed, free what we did allocate
*/
if (ret) {
queue_for_each_hw_ctx(q, hctx, i) {
if (!hctx->sched_tags)
continue;
blk_mq_sched_free_tags(set, hctx, i);
}
return ret;
}
return 0;
}
void blk_mq_sched_teardown(struct request_queue *q)
{
struct blk_mq_tag_set *set = q->tag_set;
struct blk_mq_hw_ctx *hctx;
int i;
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_sched_free_tags(set, hctx, i);
}
int blk_mq_sched_init(struct request_queue *q)
{
int ret;
#if defined(CONFIG_DEFAULT_SQ_NONE)
if (q->nr_hw_queues == 1)
return 0;
#endif
#if defined(CONFIG_DEFAULT_MQ_NONE)
if (q->nr_hw_queues > 1)
return 0;
#endif
mutex_lock(&q->sysfs_lock);
ret = elevator_init(q, NULL);
mutex_unlock(&q->sysfs_lock);
return ret;
}
#ifndef BLK_MQ_SCHED_H
#define BLK_MQ_SCHED_H
#include "blk-mq.h"
#include "blk-mq-tag.h"
int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
int (*init)(struct blk_mq_hw_ctx *),
void (*exit)(struct blk_mq_hw_ctx *));
void blk_mq_sched_free_hctx_data(struct request_queue *q,
void (*exit)(struct blk_mq_hw_ctx *));
struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, struct blk_mq_alloc_data *data);
void blk_mq_sched_put_request(struct request *rq);
void blk_mq_sched_request_inserted(struct request *rq);
bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq);
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio);
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio);
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq);
void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx);
void blk_mq_sched_insert_request(struct request *rq, bool at_head,
bool run_queue, bool async, bool can_block);
void blk_mq_sched_insert_requests(struct request_queue *q,
struct blk_mq_ctx *ctx,
struct list_head *list, bool run_queue_async);
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
struct list_head *rq_list,
struct request *(*get_rq)(struct blk_mq_hw_ctx *));
int blk_mq_sched_setup(struct request_queue *q);
void blk_mq_sched_teardown(struct request_queue *q);
int blk_mq_sched_init(struct request_queue *q);
static inline bool
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
struct elevator_queue *e = q->elevator;
if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio))
return false;
return __blk_mq_sched_bio_merge(q, bio);
}
static inline int blk_mq_sched_get_rq_priv(struct request_queue *q,
struct request *rq)
{
struct elevator_queue *e = q->elevator;
if (e && e->type->ops.mq.get_rq_priv)
return e->type->ops.mq.get_rq_priv(q, rq);
return 0;
}
static inline void blk_mq_sched_put_rq_priv(struct request_queue *q,
struct request *rq)
{
struct elevator_queue *e = q->elevator;
if (e && e->type->ops.mq.put_rq_priv)
e->type->ops.mq.put_rq_priv(q, rq);
}
static inline bool
blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
struct bio *bio)
{
struct elevator_queue *e = q->elevator;
if (e && e->type->ops.mq.allow_merge)
return e->type->ops.mq.allow_merge(q, rq, bio);
return true;
}
static inline void
blk_mq_sched_completed_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
{
struct elevator_queue *e = hctx->queue->elevator;
if (e && e->type->ops.mq.completed_request)
e->type->ops.mq.completed_request(hctx, rq);
BUG_ON(rq->internal_tag == -1);
blk_mq_put_tag(hctx, hctx->sched_tags, rq->mq_ctx, rq->internal_tag);
}
static inline void blk_mq_sched_started_request(struct request *rq)
{
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
if (e && e->type->ops.mq.started_request)
e->type->ops.mq.started_request(rq);
}
static inline void blk_mq_sched_requeue_request(struct request *rq)
{
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
if (e && e->type->ops.mq.requeue_request)
e->type->ops.mq.requeue_request(rq);
}
static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
{
struct elevator_queue *e = hctx->queue->elevator;
if (e && e->type->ops.mq.has_work)
return e->type->ops.mq.has_work(hctx);
return false;
}
static inline void blk_mq_sched_mark_restart(struct blk_mq_hw_ctx *hctx)
{
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
struct request_queue *q = hctx->queue;
if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
set_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
}
}
}
static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
{
return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}
#endif
......@@ -122,123 +122,16 @@ static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
return res;
}
static ssize_t blk_mq_sysfs_dispatched_show(struct blk_mq_ctx *ctx, char *page)
{
return sprintf(page, "%lu %lu\n", ctx->rq_dispatched[1],
ctx->rq_dispatched[0]);
}
static ssize_t blk_mq_sysfs_merged_show(struct blk_mq_ctx *ctx, char *page)
{
return sprintf(page, "%lu\n", ctx->rq_merged);
}
static ssize_t blk_mq_sysfs_completed_show(struct blk_mq_ctx *ctx, char *page)
{
return sprintf(page, "%lu %lu\n", ctx->rq_completed[1],
ctx->rq_completed[0]);
}
static ssize_t sysfs_list_show(char *page, struct list_head *list, char *msg)
{
struct request *rq;
int len = snprintf(page, PAGE_SIZE - 1, "%s:\n", msg);
list_for_each_entry(rq, list, queuelist) {
const int rq_len = 2 * sizeof(rq) + 2;
/* if the output will be truncated */
if (PAGE_SIZE - 1 < len + rq_len) {
/* backspacing if it can't hold '\t...\n' */
if (PAGE_SIZE - 1 < len + 5)
len -= rq_len;
len += snprintf(page + len, PAGE_SIZE - 1 - len,
"\t...\n");
break;
}
len += snprintf(page + len, PAGE_SIZE - 1 - len,
"\t%p\n", rq);
}
return len;
}
static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page)
{
ssize_t ret;
spin_lock(&ctx->lock);
ret = sysfs_list_show(page, &ctx->rq_list, "CTX pending");
spin_unlock(&ctx->lock);
return ret;
}
static ssize_t blk_mq_hw_sysfs_poll_show(struct blk_mq_hw_ctx *hctx, char *page)
{
return sprintf(page, "considered=%lu, invoked=%lu, success=%lu\n",
hctx->poll_considered, hctx->poll_invoked,
hctx->poll_success);
}
static ssize_t blk_mq_hw_sysfs_poll_store(struct blk_mq_hw_ctx *hctx,
const char *page, size_t size)
{
hctx->poll_considered = hctx->poll_invoked = hctx->poll_success = 0;
return size;
}
static ssize_t blk_mq_hw_sysfs_queued_show(struct blk_mq_hw_ctx *hctx,
char *page)
{
return sprintf(page, "%lu\n", hctx->queued);
}
static ssize_t blk_mq_hw_sysfs_run_show(struct blk_mq_hw_ctx *hctx, char *page)
{
return sprintf(page, "%lu\n", hctx->run);
}
static ssize_t blk_mq_hw_sysfs_dispatched_show(struct blk_mq_hw_ctx *hctx,
char *page)
{
char *start_page = page;
int i;
page += sprintf(page, "%8u\t%lu\n", 0U, hctx->dispatched[0]);
for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER - 1; i++) {
unsigned int d = 1U << (i - 1);
page += sprintf(page, "%8u\t%lu\n", d, hctx->dispatched[i]);
}
page += sprintf(page, "%8u+\t%lu\n", 1U << (i - 1),
hctx->dispatched[i]);
return page - start_page;
}
static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx,
static ssize_t blk_mq_hw_sysfs_nr_tags_show(struct blk_mq_hw_ctx *hctx,
char *page)
{
ssize_t ret;
spin_lock(&hctx->lock);
ret = sysfs_list_show(page, &hctx->dispatch, "HCTX pending");
spin_unlock(&hctx->lock);
return ret;
return sprintf(page, "%u\n", hctx->tags->nr_tags);
}
static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page)
static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx,
char *page)
{
return blk_mq_tag_sysfs_show(hctx->tags, page);
}
static ssize_t blk_mq_hw_sysfs_active_show(struct blk_mq_hw_ctx *hctx, char *page)
{
return sprintf(page, "%u\n", atomic_read(&hctx->nr_active));
return sprintf(page, "%u\n", hctx->tags->nr_reserved_tags);
}
static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
......@@ -259,121 +152,27 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
return ret;
}
static void blk_mq_stat_clear(struct blk_mq_hw_ctx *hctx)
{
struct blk_mq_ctx *ctx;
unsigned int i;
hctx_for_each_ctx(hctx, ctx, i) {
blk_stat_init(&ctx->stat[BLK_STAT_READ]);
blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
}
}
static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx,
const char *page, size_t count)
{
blk_mq_stat_clear(hctx);
return count;
}
static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre)
{
return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
pre, (long long) stat->nr_samples,
(long long) stat->mean, (long long) stat->min,
(long long) stat->max);
}
static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char *page)
{
struct blk_rq_stat stat[2];
ssize_t ret;
blk_stat_init(&stat[BLK_STAT_READ]);
blk_stat_init(&stat[BLK_STAT_WRITE]);
blk_hctx_stat_get(hctx, stat);
ret = print_stat(page, &stat[BLK_STAT_READ], "read :");
ret += print_stat(page + ret, &stat[BLK_STAT_WRITE], "write:");
return ret;
}
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = {
.attr = {.name = "dispatched", .mode = S_IRUGO },
.show = blk_mq_sysfs_dispatched_show,
};
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_merged = {
.attr = {.name = "merged", .mode = S_IRUGO },
.show = blk_mq_sysfs_merged_show,
};
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_completed = {
.attr = {.name = "completed", .mode = S_IRUGO },
.show = blk_mq_sysfs_completed_show,
};
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_rq_list = {
.attr = {.name = "rq_list", .mode = S_IRUGO },
.show = blk_mq_sysfs_rq_list_show,
};
static struct attribute *default_ctx_attrs[] = {
&blk_mq_sysfs_dispatched.attr,
&blk_mq_sysfs_merged.attr,
&blk_mq_sysfs_completed.attr,
&blk_mq_sysfs_rq_list.attr,
NULL,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_queued = {
.attr = {.name = "queued", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_queued_show,
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
.attr = {.name = "nr_tags", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_nr_tags_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_run = {
.attr = {.name = "run", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_run_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_dispatched = {
.attr = {.name = "dispatched", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_dispatched_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_active = {
.attr = {.name = "active", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_active_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = {
.attr = {.name = "pending", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_rq_list_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = {
.attr = {.name = "tags", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_tags_show,
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_reserved_tags = {
.attr = {.name = "nr_reserved_tags", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_nr_reserved_tags_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
.attr = {.name = "cpu_list", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_cpus_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = {
.attr = {.name = "io_poll", .mode = S_IWUSR | S_IRUGO },
.show = blk_mq_hw_sysfs_poll_show,
.store = blk_mq_hw_sysfs_poll_store,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = {
.attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR },
.show = blk_mq_hw_sysfs_stat_show,
.store = blk_mq_hw_sysfs_stat_store,
};
static struct attribute *default_hw_ctx_attrs[] = {
&blk_mq_hw_sysfs_queued.attr,
&blk_mq_hw_sysfs_run.attr,
&blk_mq_hw_sysfs_dispatched.attr,
&blk_mq_hw_sysfs_pending.attr,
&blk_mq_hw_sysfs_tags.attr,
&blk_mq_hw_sysfs_nr_tags.attr,
&blk_mq_hw_sysfs_nr_reserved_tags.attr,
&blk_mq_hw_sysfs_cpus.attr,
&blk_mq_hw_sysfs_active.attr,
&blk_mq_hw_sysfs_poll.attr,
&blk_mq_hw_sysfs_stat.attr,
NULL,
};
......@@ -455,6 +254,8 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
kobject_put(&hctx->kobj);
}
blk_mq_debugfs_unregister(q);
kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
kobject_del(&q->mq_kobj);
kobject_put(&q->mq_kobj);
......@@ -504,6 +305,8 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q)
kobject_uevent(&q->mq_kobj, KOBJ_ADD);
blk_mq_debugfs_register(q, kobject_name(&dev->kobj));
queue_for_each_hw_ctx(q, hctx, i) {
ret = blk_mq_register_hctx(hctx);
if (ret)
......@@ -529,6 +332,8 @@ void blk_mq_sysfs_unregister(struct request_queue *q)
if (!q->mq_sysfs_init_done)
return;
blk_mq_debugfs_unregister_hctxs(q);
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_unregister_hctx(hctx);
}
......@@ -541,6 +346,8 @@ int blk_mq_sysfs_register(struct request_queue *q)
if (!q->mq_sysfs_init_done)
return ret;
blk_mq_debugfs_register_hctxs(q);
queue_for_each_hw_ctx(q, hctx, i) {
ret = blk_mq_register_hctx(hctx);
if (ret)
......
......@@ -90,113 +90,97 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
return atomic_read(&hctx->nr_active) < depth;
}
static int __bt_get(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt)
static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
struct sbitmap_queue *bt)
{
if (!hctx_may_queue(hctx, bt))
if (!(data->flags & BLK_MQ_REQ_INTERNAL) &&
!hctx_may_queue(data->hctx, bt))
return -1;
return __sbitmap_queue_get(bt);
}
static int bt_get(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt,
struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags)
unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
{
struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
struct sbitmap_queue *bt;
struct sbq_wait_state *ws;
DEFINE_WAIT(wait);
unsigned int tag_offset;
bool drop_ctx;
int tag;
tag = __bt_get(hctx, bt);
if (data->flags & BLK_MQ_REQ_RESERVED) {
if (unlikely(!tags->nr_reserved_tags)) {
WARN_ON_ONCE(1);
return BLK_MQ_TAG_FAIL;
}
bt = &tags->breserved_tags;
tag_offset = 0;
} else {
bt = &tags->bitmap_tags;
tag_offset = tags->nr_reserved_tags;
}
tag = __blk_mq_get_tag(data, bt);
if (tag != -1)
return tag;
goto found_tag;
if (data->flags & BLK_MQ_REQ_NOWAIT)
return -1;
return BLK_MQ_TAG_FAIL;
ws = bt_wait_ptr(bt, hctx);
ws = bt_wait_ptr(bt, data->hctx);
drop_ctx = data->ctx == NULL;
do {
prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE);
tag = __bt_get(hctx, bt);
tag = __blk_mq_get_tag(data, bt);
if (tag != -1)
break;
/*
* We're out of tags on this hardware queue, kick any
* pending IO submits before going to sleep waiting for
* some to complete. Note that hctx can be NULL here for
* reserved tag allocation.
* some to complete.
*/
if (hctx)
blk_mq_run_hw_queue(hctx, false);
blk_mq_run_hw_queue(data->hctx, false);
/*
* Retry tag allocation after running the hardware queue,
* as running the queue may also have found completions.
*/
tag = __bt_get(hctx, bt);
tag = __blk_mq_get_tag(data, bt);
if (tag != -1)
break;
blk_mq_put_ctx(data->ctx);
if (data->ctx)
blk_mq_put_ctx(data->ctx);
io_schedule();
data->ctx = blk_mq_get_ctx(data->q);
data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu);
if (data->flags & BLK_MQ_REQ_RESERVED) {
bt = &data->hctx->tags->breserved_tags;
} else {
hctx = data->hctx;
bt = &hctx->tags->bitmap_tags;
}
tags = blk_mq_tags_from_data(data);
if (data->flags & BLK_MQ_REQ_RESERVED)
bt = &tags->breserved_tags;
else
bt = &tags->bitmap_tags;
finish_wait(&ws->wait, &wait);
ws = bt_wait_ptr(bt, hctx);
ws = bt_wait_ptr(bt, data->hctx);
} while (1);
finish_wait(&ws->wait, &wait);
return tag;
}
static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data)
{
int tag;
tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx,
data->hctx->tags);
if (tag >= 0)
return tag + data->hctx->tags->nr_reserved_tags;
return BLK_MQ_TAG_FAIL;
}
static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
{
int tag;
if (unlikely(!data->hctx->tags->nr_reserved_tags)) {
WARN_ON_ONCE(1);
return BLK_MQ_TAG_FAIL;
}
tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL,
data->hctx->tags);
if (tag < 0)
return BLK_MQ_TAG_FAIL;
if (drop_ctx && data->ctx)
blk_mq_put_ctx(data->ctx);
return tag;
}
finish_wait(&ws->wait, &wait);
unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
{
if (data->flags & BLK_MQ_REQ_RESERVED)
return __blk_mq_get_reserved_tag(data);
return __blk_mq_get_tag(data);
found_tag:
return tag + tag_offset;
}
void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
unsigned int tag)
void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
struct blk_mq_ctx *ctx, unsigned int tag)
{
struct blk_mq_tags *tags = hctx->tags;
if (tag >= tags->nr_reserved_tags) {
const int real_tag = tag - tags->nr_reserved_tags;
......@@ -312,11 +296,11 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
struct blk_mq_tags *tags = set->tags[i];
for (j = 0; j < tags->nr_tags; j++) {
if (!tags->rqs[j])
if (!tags->static_rqs[j])
continue;
ret = set->ops->reinit_request(set->driver_data,
tags->rqs[j]);
tags->static_rqs[j]);
if (ret)
goto out;
}
......@@ -351,11 +335,6 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
}
static unsigned int bt_unused_tags(const struct sbitmap_queue *bt)
{
return bt->sb.depth - sbitmap_weight(&bt->sb);
}
static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
bool round_robin, int node)
{
......@@ -411,19 +390,56 @@ void blk_mq_free_tags(struct blk_mq_tags *tags)
kfree(tags);
}
int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
struct blk_mq_tags **tagsptr, unsigned int tdepth,
bool can_grow)
{
tdepth -= tags->nr_reserved_tags;
if (tdepth > tags->nr_tags)
struct blk_mq_tags *tags = *tagsptr;
if (tdepth <= tags->nr_reserved_tags)
return -EINVAL;
tdepth -= tags->nr_reserved_tags;
/*
* Don't need (or can't) update reserved tags here, they remain
* static and should never need resizing.
* If we are allowed to grow beyond the original size, allocate
* a new set of tags before freeing the old one.
*/
sbitmap_queue_resize(&tags->bitmap_tags, tdepth);
if (tdepth > tags->nr_tags) {
struct blk_mq_tag_set *set = hctx->queue->tag_set;
struct blk_mq_tags *new;
bool ret;
if (!can_grow)
return -EINVAL;
/*
* We need some sort of upper limit, set it high enough that
* no valid use cases should require more.
*/
if (tdepth > 16 * BLKDEV_MAX_RQ)
return -EINVAL;
new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth, 0);
if (!new)
return -ENOMEM;
ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth);
if (ret) {
blk_mq_free_rq_map(new);
return -ENOMEM;
}
blk_mq_free_rqs(set, *tagsptr, hctx->queue_num);
blk_mq_free_rq_map(*tagsptr);
*tagsptr = new;
} else {
/*
* Don't need (or can't) update reserved tags here, they
* remain static and should never need resizing.
*/
sbitmap_queue_resize(&tags->bitmap_tags, tdepth);
}
blk_mq_tag_wakeup_all(tags, false);
return 0;
}
......@@ -454,25 +470,3 @@ u32 blk_mq_unique_tag(struct request *rq)
(rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
}
EXPORT_SYMBOL(blk_mq_unique_tag);
ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
{
char *orig_page = page;
unsigned int free, res;
if (!tags)
return 0;
page += sprintf(page, "nr_tags=%u, reserved_tags=%u, "
"bits_per_word=%u\n",
tags->nr_tags, tags->nr_reserved_tags,
1U << tags->bitmap_tags.sb.shift);
free = bt_unused_tags(&tags->bitmap_tags);
res = bt_unused_tags(&tags->breserved_tags);
page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res);
page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues));
return page - orig_page;
}
......@@ -16,6 +16,7 @@ struct blk_mq_tags {
struct sbitmap_queue breserved_tags;
struct request **rqs;
struct request **static_rqs;
struct list_head page_list;
};
......@@ -24,11 +25,12 @@ extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int r
extern void blk_mq_free_tags(struct blk_mq_tags *tags);
extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
unsigned int tag);
extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
struct blk_mq_ctx *ctx, unsigned int tag);
extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth);
extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
struct blk_mq_tags **tags,
unsigned int depth, bool can_grow);
extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
void *priv);
......
This diff is collapsed.
......@@ -32,7 +32,31 @@ void blk_mq_free_queue(struct request_queue *q);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
void blk_mq_wake_waiters(struct request_queue *q);
bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *);
void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx);
bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
bool wait);
/*
* Internal helpers for allocating/freeing the request map
*/
void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
unsigned int hctx_idx);
void blk_mq_free_rq_map(struct blk_mq_tags *tags);
struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
unsigned int hctx_idx,
unsigned int nr_tags,
unsigned int reserved_tags);
int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
unsigned int hctx_idx, unsigned int depth);
/*
* Internal helpers for request insertion into sw queues
*/
void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool at_head);
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct list_head *list);
/*
* CPU hotplug helpers
*/
......@@ -57,6 +81,40 @@ extern int blk_mq_sysfs_register(struct request_queue *q);
extern void blk_mq_sysfs_unregister(struct request_queue *q);
extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
/*
* debugfs helpers
*/
#ifdef CONFIG_BLK_DEBUG_FS
void blk_mq_debugfs_init(void);
int blk_mq_debugfs_register(struct request_queue *q, const char *name);
void blk_mq_debugfs_unregister(struct request_queue *q);
int blk_mq_debugfs_register_hctxs(struct request_queue *q);
void blk_mq_debugfs_unregister_hctxs(struct request_queue *q);
#else
static inline void blk_mq_debugfs_init(void)
{
}
static inline int blk_mq_debugfs_register(struct request_queue *q,
const char *name)
{
return 0;
}
static inline void blk_mq_debugfs_unregister(struct request_queue *q)
{
}
static inline int blk_mq_debugfs_register_hctxs(struct request_queue *q)
{
return 0;
}
static inline void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
{
}
#endif
extern void blk_mq_rq_timed_out(struct request *req, bool reserved);
void blk_mq_release(struct request_queue *q);
......@@ -103,6 +161,25 @@ static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data,
data->hctx = hctx;
}
static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
{
if (data->flags & BLK_MQ_REQ_INTERNAL)
return data->hctx->sched_tags;
return data->hctx->tags;
}
/*
* Internal helpers for request allocation/init/free
*/
void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
struct request *rq, unsigned int op);
void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct request *rq);
void blk_mq_finish_request(struct request *rq);
struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
unsigned int op);
static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
{
return test_bit(BLK_MQ_S_STOPPED, &hctx->state);
......
......@@ -272,6 +272,7 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq)
list_del_init(&rq->queuelist);
rq->rq_flags &= ~RQF_QUEUED;
rq->tag = -1;
rq->internal_tag = -1;
if (unlikely(bqt->tag_index[tag] == NULL))
printk(KERN_ERR "%s: tag %d is missing\n",
......
......@@ -866,10 +866,12 @@ static void tg_update_disptime(struct throtl_grp *tg)
unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime;
struct bio *bio;
if ((bio = throtl_peek_queued(&sq->queued[READ])))
bio = throtl_peek_queued(&sq->queued[READ]);
if (bio)
tg_may_dispatch(tg, bio, &read_wait);
if ((bio = throtl_peek_queued(&sq->queued[WRITE])))
bio = throtl_peek_queued(&sq->queued[WRITE]);
if (bio)
tg_may_dispatch(tg, bio, &write_wait);
min_wait = min(read_wait, write_wait);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment