Commit f924ba70 authored by Jens Axboe's avatar Jens Axboe

Merge branch 'for-4.11/block' into for-4.11/rq-refactor

Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parents 7a308bb3 400f73b2
...@@ -8604,10 +8604,10 @@ S: Maintained ...@@ -8604,10 +8604,10 @@ S: Maintained
F: drivers/net/ethernet/netronome/ F: drivers/net/ethernet/netronome/
NETWORK BLOCK DEVICE (NBD) NETWORK BLOCK DEVICE (NBD)
M: Markus Pargmann <mpa@pengutronix.de> M: Josef Bacik <jbacik@fb.com>
S: Maintained S: Maintained
L: linux-block@vger.kernel.org
L: nbd-general@lists.sourceforge.net L: nbd-general@lists.sourceforge.net
T: git git://git.pengutronix.de/git/mpa/linux-nbd.git
F: Documentation/blockdev/nbd.txt F: Documentation/blockdev/nbd.txt
F: drivers/block/nbd.c F: drivers/block/nbd.c
F: include/uapi/linux/nbd.h F: include/uapi/linux/nbd.h
......
...@@ -147,6 +147,18 @@ config BLK_WBT_MQ ...@@ -147,6 +147,18 @@ config BLK_WBT_MQ
Multiqueue currently doesn't have support for IO scheduling, Multiqueue currently doesn't have support for IO scheduling,
enabling this option is recommended. enabling this option is recommended.
config BLK_DEBUG_FS
bool "Block layer debugging information in debugfs"
default y
depends on DEBUG_FS
---help---
Include block layer debugging information in debugfs. This information
is mostly useful for kernel developers, but it doesn't incur any cost
at runtime.
Unless you are building a kernel for a tiny system, you should
say Y here.
menu "Partition Types" menu "Partition Types"
source "block/partitions/Kconfig" source "block/partitions/Kconfig"
......
...@@ -63,6 +63,56 @@ config DEFAULT_IOSCHED ...@@ -63,6 +63,56 @@ config DEFAULT_IOSCHED
default "cfq" if DEFAULT_CFQ default "cfq" if DEFAULT_CFQ
default "noop" if DEFAULT_NOOP default "noop" if DEFAULT_NOOP
config MQ_IOSCHED_DEADLINE
tristate "MQ deadline I/O scheduler"
default y
---help---
MQ version of the deadline IO scheduler.
config MQ_IOSCHED_NONE
bool
default y
choice
prompt "Default single-queue blk-mq I/O scheduler"
default DEFAULT_SQ_NONE
help
Select the I/O scheduler which will be used by default for blk-mq
managed block devices with a single queue.
config DEFAULT_SQ_DEADLINE
bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y
config DEFAULT_SQ_NONE
bool "None"
endchoice
config DEFAULT_SQ_IOSCHED
string
default "mq-deadline" if DEFAULT_SQ_DEADLINE
default "none" if DEFAULT_SQ_NONE
choice
prompt "Default multi-queue blk-mq I/O scheduler"
default DEFAULT_MQ_NONE
help
Select the I/O scheduler which will be used by default for blk-mq
managed block devices with multiple queues.
config DEFAULT_MQ_DEADLINE
bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y
config DEFAULT_MQ_NONE
bool "None"
endchoice
config DEFAULT_MQ_IOSCHED
string
default "mq-deadline" if DEFAULT_MQ_DEADLINE
default "none" if DEFAULT_MQ_NONE
endmenu endmenu
endif endif
...@@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ ...@@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \ blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
badblocks.o partitions/ badblocks.o partitions/
...@@ -18,6 +18,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o ...@@ -18,6 +18,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o
obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
...@@ -25,3 +26,4 @@ obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o ...@@ -25,3 +26,4 @@ obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o
obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
...@@ -1223,7 +1223,10 @@ int blkcg_activate_policy(struct request_queue *q, ...@@ -1223,7 +1223,10 @@ int blkcg_activate_policy(struct request_queue *q,
if (blkcg_policy_enabled(q, pol)) if (blkcg_policy_enabled(q, pol))
return 0; return 0;
blk_queue_bypass_start(q); if (q->mq_ops)
blk_mq_freeze_queue(q);
else
blk_queue_bypass_start(q);
pd_prealloc: pd_prealloc:
if (!pd_prealloc) { if (!pd_prealloc) {
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node); pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node);
...@@ -1261,7 +1264,10 @@ int blkcg_activate_policy(struct request_queue *q, ...@@ -1261,7 +1264,10 @@ int blkcg_activate_policy(struct request_queue *q,
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
out_bypass_end: out_bypass_end:
blk_queue_bypass_end(q); if (q->mq_ops)
blk_mq_unfreeze_queue(q);
else
blk_queue_bypass_end(q);
if (pd_prealloc) if (pd_prealloc)
pol->pd_free_fn(pd_prealloc); pol->pd_free_fn(pd_prealloc);
return ret; return ret;
...@@ -1284,7 +1290,11 @@ void blkcg_deactivate_policy(struct request_queue *q, ...@@ -1284,7 +1290,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
if (!blkcg_policy_enabled(q, pol)) if (!blkcg_policy_enabled(q, pol))
return; return;
blk_queue_bypass_start(q); if (q->mq_ops)
blk_mq_freeze_queue(q);
else
blk_queue_bypass_start(q);
spin_lock_irq(q->queue_lock); spin_lock_irq(q->queue_lock);
__clear_bit(pol->plid, q->blkcg_pols); __clear_bit(pol->plid, q->blkcg_pols);
...@@ -1304,7 +1314,11 @@ void blkcg_deactivate_policy(struct request_queue *q, ...@@ -1304,7 +1314,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
} }
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
blk_queue_bypass_end(q);
if (q->mq_ops)
blk_mq_unfreeze_queue(q);
else
blk_queue_bypass_end(q);
} }
EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include "blk.h" #include "blk.h"
#include "blk-mq.h" #include "blk-mq.h"
#include "blk-mq-sched.h"
#include "blk-wbt.h" #include "blk-wbt.h"
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
...@@ -134,6 +135,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) ...@@ -134,6 +135,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->cmd = rq->__cmd; rq->cmd = rq->__cmd;
rq->cmd_len = BLK_MAX_CDB; rq->cmd_len = BLK_MAX_CDB;
rq->tag = -1; rq->tag = -1;
rq->internal_tag = -1;
rq->start_time = jiffies; rq->start_time = jiffies;
set_start_time_ns(rq); set_start_time_ns(rq);
rq->part = NULL; rq->part = NULL;
...@@ -1033,28 +1035,12 @@ static bool blk_rq_should_init_elevator(struct bio *bio) ...@@ -1033,28 +1035,12 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
* Flush requests do not use the elevator so skip initialization. * Flush requests do not use the elevator so skip initialization.
* This allows a request to share the flush and elevator data. * This allows a request to share the flush and elevator data.
*/ */
if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) if (op_is_flush(bio->bi_opf))
return false; return false;
return true; return true;
} }
/**
* rq_ioc - determine io_context for request allocation
* @bio: request being allocated is for this bio (can be %NULL)
*
* Determine io_context to use for request allocation for @bio. May return
* %NULL if %current->io_context doesn't exist.
*/
static struct io_context *rq_ioc(struct bio *bio)
{
#ifdef CONFIG_BLK_CGROUP
if (bio && bio->bi_ioc)
return bio->bi_ioc;
#endif
return current->io_context;
}
/** /**
* __get_request - get a free request * __get_request - get a free request
* @rl: request list to allocate from * @rl: request list to allocate from
...@@ -1655,7 +1641,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) ...@@ -1655,7 +1641,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
return BLK_QC_T_NONE; return BLK_QC_T_NONE;
} }
if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) { if (op_is_flush(bio->bi_opf)) {
spin_lock_irq(q->queue_lock); spin_lock_irq(q->queue_lock);
where = ELEVATOR_INSERT_FLUSH; where = ELEVATOR_INSERT_FLUSH;
goto get_rq; goto get_rq;
...@@ -1894,7 +1880,7 @@ generic_make_request_checks(struct bio *bio) ...@@ -1894,7 +1880,7 @@ generic_make_request_checks(struct bio *bio)
* drivers without flush support don't have to worry * drivers without flush support don't have to worry
* about them. * about them.
*/ */
if ((bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) && if (op_is_flush(bio->bi_opf) &&
!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
if (!nr_sectors) { if (!nr_sectors) {
...@@ -2143,7 +2129,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) ...@@ -2143,7 +2129,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
if (q->mq_ops) { if (q->mq_ops) {
if (blk_queue_io_stat(q)) if (blk_queue_io_stat(q))
blk_account_io_start(rq, true); blk_account_io_start(rq, true);
blk_mq_insert_request(rq, false, true, false); blk_mq_sched_insert_request(rq, false, true, false, false);
return 0; return 0;
} }
...@@ -2159,7 +2145,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) ...@@ -2159,7 +2145,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
*/ */
BUG_ON(blk_queued_rq(rq)); BUG_ON(blk_queued_rq(rq));
if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA)) if (op_is_flush(rq->cmd_flags))
where = ELEVATOR_INSERT_FLUSH; where = ELEVATOR_INSERT_FLUSH;
add_acct_request(q, rq, where); add_acct_request(q, rq, where);
...@@ -3270,7 +3256,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) ...@@ -3270,7 +3256,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
/* /*
* rq is already accounted, so use raw insert * rq is already accounted, so use raw insert
*/ */
if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA)) if (op_is_flush(rq->cmd_flags))
__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
else else
__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <linux/sched/sysctl.h> #include <linux/sched/sysctl.h>
#include "blk.h" #include "blk.h"
#include "blk-mq-sched.h"
/* /*
* for max sense size * for max sense size
...@@ -65,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, ...@@ -65,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
* be reused after dying flag is set * be reused after dying flag is set
*/ */
if (q->mq_ops) { if (q->mq_ops) {
blk_mq_insert_request(rq, at_head, true, false); blk_mq_sched_insert_request(rq, at_head, true, false, false);
return; return;
} }
......
...@@ -74,6 +74,7 @@ ...@@ -74,6 +74,7 @@
#include "blk.h" #include "blk.h"
#include "blk-mq.h" #include "blk-mq.h"
#include "blk-mq-tag.h" #include "blk-mq-tag.h"
#include "blk-mq-sched.h"
/* FLUSH/FUA sequences */ /* FLUSH/FUA sequences */
enum { enum {
...@@ -391,9 +392,10 @@ static void mq_flush_data_end_io(struct request *rq, int error) ...@@ -391,9 +392,10 @@ static void mq_flush_data_end_io(struct request *rq, int error)
* the comment in flush_end_io(). * the comment in flush_end_io().
*/ */
spin_lock_irqsave(&fq->mq_flush_lock, flags); spin_lock_irqsave(&fq->mq_flush_lock, flags);
if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error)) blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error);
blk_mq_run_hw_queue(hctx, true);
spin_unlock_irqrestore(&fq->mq_flush_lock, flags); spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
blk_mq_run_hw_queue(hctx, true);
} }
/** /**
...@@ -453,9 +455,9 @@ void blk_insert_flush(struct request *rq) ...@@ -453,9 +455,9 @@ void blk_insert_flush(struct request *rq)
*/ */
if ((policy & REQ_FSEQ_DATA) && if ((policy & REQ_FSEQ_DATA) &&
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
if (q->mq_ops) { if (q->mq_ops)
blk_mq_insert_request(rq, false, true, false); blk_mq_sched_insert_request(rq, false, true, false, false);
} else else
list_add_tail(&rq->queuelist, &q->queue_head); list_add_tail(&rq->queuelist, &q->queue_head);
return; return;
} }
......
...@@ -43,8 +43,10 @@ static void ioc_exit_icq(struct io_cq *icq) ...@@ -43,8 +43,10 @@ static void ioc_exit_icq(struct io_cq *icq)
if (icq->flags & ICQ_EXITED) if (icq->flags & ICQ_EXITED)
return; return;
if (et->ops.elevator_exit_icq_fn) if (et->uses_mq && et->ops.mq.exit_icq)
et->ops.elevator_exit_icq_fn(icq); et->ops.mq.exit_icq(icq);
else if (!et->uses_mq && et->ops.sq.elevator_exit_icq_fn)
et->ops.sq.elevator_exit_icq_fn(icq);
icq->flags |= ICQ_EXITED; icq->flags |= ICQ_EXITED;
} }
...@@ -383,8 +385,10 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q, ...@@ -383,8 +385,10 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
hlist_add_head(&icq->ioc_node, &ioc->icq_list); hlist_add_head(&icq->ioc_node, &ioc->icq_list);
list_add(&icq->q_node, &q->icq_list); list_add(&icq->q_node, &q->icq_list);
if (et->ops.elevator_init_icq_fn) if (et->uses_mq && et->ops.mq.init_icq)
et->ops.elevator_init_icq_fn(icq); et->ops.mq.init_icq(icq);
else if (!et->uses_mq && et->ops.sq.elevator_init_icq_fn)
et->ops.sq.elevator_init_icq_fn(icq);
} else { } else {
kmem_cache_free(et->icq_cache, icq); kmem_cache_free(et->icq_cache, icq);
icq = ioc_lookup_icq(ioc, q); icq = ioc_lookup_icq(ioc, q);
......
...@@ -763,8 +763,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq, ...@@ -763,8 +763,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
{ {
struct elevator_queue *e = q->elevator; struct elevator_queue *e = q->elevator;
if (e->type->ops.elevator_allow_rq_merge_fn) if (!e->uses_mq && e->type->ops.sq.elevator_allow_rq_merge_fn)
if (!e->type->ops.elevator_allow_rq_merge_fn(q, rq, next)) if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next))
return 0; return 0;
return attempt_merge(q, rq, next); return attempt_merge(q, rq, next);
......
This diff is collapsed.
This diff is collapsed.
#ifndef BLK_MQ_SCHED_H
#define BLK_MQ_SCHED_H
#include "blk-mq.h"
#include "blk-mq-tag.h"
int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
int (*init)(struct blk_mq_hw_ctx *),
void (*exit)(struct blk_mq_hw_ctx *));
void blk_mq_sched_free_hctx_data(struct request_queue *q,
void (*exit)(struct blk_mq_hw_ctx *));
struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, struct blk_mq_alloc_data *data);
void blk_mq_sched_put_request(struct request *rq);
void blk_mq_sched_request_inserted(struct request *rq);
bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq);
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio);
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio);
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq);
void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx);
void blk_mq_sched_insert_request(struct request *rq, bool at_head,
bool run_queue, bool async, bool can_block);
void blk_mq_sched_insert_requests(struct request_queue *q,
struct blk_mq_ctx *ctx,
struct list_head *list, bool run_queue_async);
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
struct list_head *rq_list,
struct request *(*get_rq)(struct blk_mq_hw_ctx *));
int blk_mq_sched_setup(struct request_queue *q);
void blk_mq_sched_teardown(struct request_queue *q);
int blk_mq_sched_init(struct request_queue *q);
static inline bool
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
struct elevator_queue *e = q->elevator;
if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio))
return false;
return __blk_mq_sched_bio_merge(q, bio);
}
static inline int blk_mq_sched_get_rq_priv(struct request_queue *q,
struct request *rq)
{
struct elevator_queue *e = q->elevator;
if (e && e->type->ops.mq.get_rq_priv)
return e->type->ops.mq.get_rq_priv(q, rq);
return 0;
}
static inline void blk_mq_sched_put_rq_priv(struct request_queue *q,
struct request *rq)
{
struct elevator_queue *e = q->elevator;
if (e && e->type->ops.mq.put_rq_priv)
e->type->ops.mq.put_rq_priv(q, rq);
}
static inline bool
blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
struct bio *bio)
{
struct elevator_queue *e = q->elevator;
if (e && e->type->ops.mq.allow_merge)
return e->type->ops.mq.allow_merge(q, rq, bio);
return true;
}
static inline void
blk_mq_sched_completed_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
{
struct elevator_queue *e = hctx->queue->elevator;
if (e && e->type->ops.mq.completed_request)
e->type->ops.mq.completed_request(hctx, rq);
BUG_ON(rq->internal_tag == -1);
blk_mq_put_tag(hctx, hctx->sched_tags, rq->mq_ctx, rq->internal_tag);
}
static inline void blk_mq_sched_started_request(struct request *rq)
{
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
if (e && e->type->ops.mq.started_request)
e->type->ops.mq.started_request(rq);
}
static inline void blk_mq_sched_requeue_request(struct request *rq)
{
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
if (e && e->type->ops.mq.requeue_request)
e->type->ops.mq.requeue_request(rq);
}
static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
{
struct elevator_queue *e = hctx->queue->elevator;
if (e && e->type->ops.mq.has_work)
return e->type->ops.mq.has_work(hctx);
return false;
}
static inline void blk_mq_sched_mark_restart(struct blk_mq_hw_ctx *hctx)
{
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
struct request_queue *q = hctx->queue;
if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
set_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
}
}
}
static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
{
return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}
#endif
...@@ -122,123 +122,16 @@ static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj, ...@@ -122,123 +122,16 @@ static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
return res; return res;
} }
static ssize_t blk_mq_sysfs_dispatched_show(struct blk_mq_ctx *ctx, char *page) static ssize_t blk_mq_hw_sysfs_nr_tags_show(struct blk_mq_hw_ctx *hctx,
{
return sprintf(page, "%lu %lu\n", ctx->rq_dispatched[1],
ctx->rq_dispatched[0]);
}
static ssize_t blk_mq_sysfs_merged_show(struct blk_mq_ctx *ctx, char *page)
{
return sprintf(page, "%lu\n", ctx->rq_merged);
}
static ssize_t blk_mq_sysfs_completed_show(struct blk_mq_ctx *ctx, char *page)
{
return sprintf(page, "%lu %lu\n", ctx->rq_completed[1],
ctx->rq_completed[0]);
}
static ssize_t sysfs_list_show(char *page, struct list_head *list, char *msg)
{
struct request *rq;
int len = snprintf(page, PAGE_SIZE - 1, "%s:\n", msg);
list_for_each_entry(rq, list, queuelist) {
const int rq_len = 2 * sizeof(rq) + 2;
/* if the output will be truncated */
if (PAGE_SIZE - 1 < len + rq_len) {
/* backspacing if it can't hold '\t...\n' */
if (PAGE_SIZE - 1 < len + 5)
len -= rq_len;
len += snprintf(page + len, PAGE_SIZE - 1 - len,
"\t...\n");
break;
}
len += snprintf(page + len, PAGE_SIZE - 1 - len,
"\t%p\n", rq);
}
return len;
}
static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page)
{
ssize_t ret;
spin_lock(&ctx->lock);
ret = sysfs_list_show(page, &ctx->rq_list, "CTX pending");
spin_unlock(&ctx->lock);
return ret;
}
static ssize_t blk_mq_hw_sysfs_poll_show(struct blk_mq_hw_ctx *hctx, char *page)
{
return sprintf(page, "considered=%lu, invoked=%lu, success=%lu\n",
hctx->poll_considered, hctx->poll_invoked,
hctx->poll_success);
}
static ssize_t blk_mq_hw_sysfs_poll_store(struct blk_mq_hw_ctx *hctx,
const char *page, size_t size)
{
hctx->poll_considered = hctx->poll_invoked = hctx->poll_success = 0;
return size;
}
static ssize_t blk_mq_hw_sysfs_queued_show(struct blk_mq_hw_ctx *hctx,
char *page)
{
return sprintf(page, "%lu\n", hctx->queued);
}
static ssize_t blk_mq_hw_sysfs_run_show(struct blk_mq_hw_ctx *hctx, char *page)
{
return sprintf(page, "%lu\n", hctx->run);
}
static ssize_t blk_mq_hw_sysfs_dispatched_show(struct blk_mq_hw_ctx *hctx,
char *page)
{
char *start_page = page;
int i;
page += sprintf(page, "%8u\t%lu\n", 0U, hctx->dispatched[0]);
for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER - 1; i++) {
unsigned int d = 1U << (i - 1);
page += sprintf(page, "%8u\t%lu\n", d, hctx->dispatched[i]);
}
page += sprintf(page, "%8u+\t%lu\n", 1U << (i - 1),
hctx->dispatched[i]);
return page - start_page;
}
static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx,
char *page) char *page)
{ {
ssize_t ret; return sprintf(page, "%u\n", hctx->tags->nr_tags);
spin_lock(&hctx->lock);
ret = sysfs_list_show(page, &hctx->dispatch, "HCTX pending");
spin_unlock(&hctx->lock);
return ret;
} }
static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page) static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx,
char *page)
{ {
return blk_mq_tag_sysfs_show(hctx->tags, page); return sprintf(page, "%u\n", hctx->tags->nr_reserved_tags);
}
static ssize_t blk_mq_hw_sysfs_active_show(struct blk_mq_hw_ctx *hctx, char *page)
{
return sprintf(page, "%u\n", atomic_read(&hctx->nr_active));
} }
static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
...@@ -259,121 +152,27 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) ...@@ -259,121 +152,27 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
return ret; return ret;
} }
static void blk_mq_stat_clear(struct blk_mq_hw_ctx *hctx)
{
struct blk_mq_ctx *ctx;
unsigned int i;
hctx_for_each_ctx(hctx, ctx, i) {
blk_stat_init(&ctx->stat[BLK_STAT_READ]);
blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
}
}
static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx,
const char *page, size_t count)
{
blk_mq_stat_clear(hctx);
return count;
}
static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre)
{
return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
pre, (long long) stat->nr_samples,
(long long) stat->mean, (long long) stat->min,
(long long) stat->max);
}
static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char *page)
{
struct blk_rq_stat stat[2];
ssize_t ret;
blk_stat_init(&stat[BLK_STAT_READ]);
blk_stat_init(&stat[BLK_STAT_WRITE]);
blk_hctx_stat_get(hctx, stat);
ret = print_stat(page, &stat[BLK_STAT_READ], "read :");
ret += print_stat(page + ret, &stat[BLK_STAT_WRITE], "write:");
return ret;
}
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = {
.attr = {.name = "dispatched", .mode = S_IRUGO },
.show = blk_mq_sysfs_dispatched_show,
};
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_merged = {
.attr = {.name = "merged", .mode = S_IRUGO },
.show = blk_mq_sysfs_merged_show,
};
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_completed = {
.attr = {.name = "completed", .mode = S_IRUGO },
.show = blk_mq_sysfs_completed_show,
};
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_rq_list = {
.attr = {.name = "rq_list", .mode = S_IRUGO },
.show = blk_mq_sysfs_rq_list_show,
};
static struct attribute *default_ctx_attrs[] = { static struct attribute *default_ctx_attrs[] = {
&blk_mq_sysfs_dispatched.attr,
&blk_mq_sysfs_merged.attr,
&blk_mq_sysfs_completed.attr,
&blk_mq_sysfs_rq_list.attr,
NULL, NULL,
}; };
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_queued = { static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
.attr = {.name = "queued", .mode = S_IRUGO }, .attr = {.name = "nr_tags", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_queued_show, .show = blk_mq_hw_sysfs_nr_tags_show,
}; };
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_run = { static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_reserved_tags = {
.attr = {.name = "run", .mode = S_IRUGO }, .attr = {.name = "nr_reserved_tags", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_run_show, .show = blk_mq_hw_sysfs_nr_reserved_tags_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_dispatched = {
.attr = {.name = "dispatched", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_dispatched_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_active = {
.attr = {.name = "active", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_active_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = {
.attr = {.name = "pending", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_rq_list_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = {
.attr = {.name = "tags", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_tags_show,
}; };
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = { static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
.attr = {.name = "cpu_list", .mode = S_IRUGO }, .attr = {.name = "cpu_list", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_cpus_show, .show = blk_mq_hw_sysfs_cpus_show,
}; };
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = {
.attr = {.name = "io_poll", .mode = S_IWUSR | S_IRUGO },
.show = blk_mq_hw_sysfs_poll_show,
.store = blk_mq_hw_sysfs_poll_store,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = {
.attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR },
.show = blk_mq_hw_sysfs_stat_show,
.store = blk_mq_hw_sysfs_stat_store,
};
static struct attribute *default_hw_ctx_attrs[] = { static struct attribute *default_hw_ctx_attrs[] = {
&blk_mq_hw_sysfs_queued.attr, &blk_mq_hw_sysfs_nr_tags.attr,
&blk_mq_hw_sysfs_run.attr, &blk_mq_hw_sysfs_nr_reserved_tags.attr,
&blk_mq_hw_sysfs_dispatched.attr,
&blk_mq_hw_sysfs_pending.attr,
&blk_mq_hw_sysfs_tags.attr,
&blk_mq_hw_sysfs_cpus.attr, &blk_mq_hw_sysfs_cpus.attr,
&blk_mq_hw_sysfs_active.attr,
&blk_mq_hw_sysfs_poll.attr,
&blk_mq_hw_sysfs_stat.attr,
NULL, NULL,
}; };
...@@ -455,6 +254,8 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) ...@@ -455,6 +254,8 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
kobject_put(&hctx->kobj); kobject_put(&hctx->kobj);
} }
blk_mq_debugfs_unregister(q);
kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
kobject_del(&q->mq_kobj); kobject_del(&q->mq_kobj);
kobject_put(&q->mq_kobj); kobject_put(&q->mq_kobj);
...@@ -504,6 +305,8 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q) ...@@ -504,6 +305,8 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q)
kobject_uevent(&q->mq_kobj, KOBJ_ADD); kobject_uevent(&q->mq_kobj, KOBJ_ADD);
blk_mq_debugfs_register(q, kobject_name(&dev->kobj));
queue_for_each_hw_ctx(q, hctx, i) { queue_for_each_hw_ctx(q, hctx, i) {
ret = blk_mq_register_hctx(hctx); ret = blk_mq_register_hctx(hctx);
if (ret) if (ret)
...@@ -529,6 +332,8 @@ void blk_mq_sysfs_unregister(struct request_queue *q) ...@@ -529,6 +332,8 @@ void blk_mq_sysfs_unregister(struct request_queue *q)
if (!q->mq_sysfs_init_done) if (!q->mq_sysfs_init_done)
return; return;
blk_mq_debugfs_unregister_hctxs(q);
queue_for_each_hw_ctx(q, hctx, i) queue_for_each_hw_ctx(q, hctx, i)
blk_mq_unregister_hctx(hctx); blk_mq_unregister_hctx(hctx);
} }
...@@ -541,6 +346,8 @@ int blk_mq_sysfs_register(struct request_queue *q) ...@@ -541,6 +346,8 @@ int blk_mq_sysfs_register(struct request_queue *q)
if (!q->mq_sysfs_init_done) if (!q->mq_sysfs_init_done)
return ret; return ret;
blk_mq_debugfs_register_hctxs(q);
queue_for_each_hw_ctx(q, hctx, i) { queue_for_each_hw_ctx(q, hctx, i) {
ret = blk_mq_register_hctx(hctx); ret = blk_mq_register_hctx(hctx);
if (ret) if (ret)
......
...@@ -90,113 +90,97 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, ...@@ -90,113 +90,97 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
return atomic_read(&hctx->nr_active) < depth; return atomic_read(&hctx->nr_active) < depth;
} }
static int __bt_get(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt) static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
struct sbitmap_queue *bt)
{ {
if (!hctx_may_queue(hctx, bt)) if (!(data->flags & BLK_MQ_REQ_INTERNAL) &&
!hctx_may_queue(data->hctx, bt))
return -1; return -1;
return __sbitmap_queue_get(bt); return __sbitmap_queue_get(bt);
} }
static int bt_get(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt, unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags)
{ {
struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
struct sbitmap_queue *bt;
struct sbq_wait_state *ws; struct sbq_wait_state *ws;
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
unsigned int tag_offset;
bool drop_ctx;
int tag; int tag;
tag = __bt_get(hctx, bt); if (data->flags & BLK_MQ_REQ_RESERVED) {
if (unlikely(!tags->nr_reserved_tags)) {
WARN_ON_ONCE(1);
return BLK_MQ_TAG_FAIL;
}
bt = &tags->breserved_tags;
tag_offset = 0;
} else {
bt = &tags->bitmap_tags;
tag_offset = tags->nr_reserved_tags;
}
tag = __blk_mq_get_tag(data, bt);
if (tag != -1) if (tag != -1)
return tag; goto found_tag;
if (data->flags & BLK_MQ_REQ_NOWAIT) if (data->flags & BLK_MQ_REQ_NOWAIT)
return -1; return BLK_MQ_TAG_FAIL;
ws = bt_wait_ptr(bt, hctx); ws = bt_wait_ptr(bt, data->hctx);
drop_ctx = data->ctx == NULL;
do { do {
prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE); prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE);
tag = __bt_get(hctx, bt); tag = __blk_mq_get_tag(data, bt);
if (tag != -1) if (tag != -1)
break; break;
/* /*
* We're out of tags on this hardware queue, kick any * We're out of tags on this hardware queue, kick any
* pending IO submits before going to sleep waiting for * pending IO submits before going to sleep waiting for
* some to complete. Note that hctx can be NULL here for * some to complete.
* reserved tag allocation.
*/ */
if (hctx) blk_mq_run_hw_queue(data->hctx, false);
blk_mq_run_hw_queue(hctx, false);
/* /*
* Retry tag allocation after running the hardware queue, * Retry tag allocation after running the hardware queue,
* as running the queue may also have found completions. * as running the queue may also have found completions.
*/ */
tag = __bt_get(hctx, bt); tag = __blk_mq_get_tag(data, bt);
if (tag != -1) if (tag != -1)
break; break;
blk_mq_put_ctx(data->ctx); if (data->ctx)
blk_mq_put_ctx(data->ctx);
io_schedule(); io_schedule();
data->ctx = blk_mq_get_ctx(data->q); data->ctx = blk_mq_get_ctx(data->q);
data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu); data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu);
if (data->flags & BLK_MQ_REQ_RESERVED) { tags = blk_mq_tags_from_data(data);
bt = &data->hctx->tags->breserved_tags; if (data->flags & BLK_MQ_REQ_RESERVED)
} else { bt = &tags->breserved_tags;
hctx = data->hctx; else
bt = &hctx->tags->bitmap_tags; bt = &tags->bitmap_tags;
}
finish_wait(&ws->wait, &wait); finish_wait(&ws->wait, &wait);
ws = bt_wait_ptr(bt, hctx); ws = bt_wait_ptr(bt, data->hctx);
} while (1); } while (1);
finish_wait(&ws->wait, &wait); if (drop_ctx && data->ctx)
return tag; blk_mq_put_ctx(data->ctx);
}
static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data)
{
int tag;
tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx,
data->hctx->tags);
if (tag >= 0)
return tag + data->hctx->tags->nr_reserved_tags;
return BLK_MQ_TAG_FAIL;
}
static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
{
int tag;
if (unlikely(!data->hctx->tags->nr_reserved_tags)) {
WARN_ON_ONCE(1);
return BLK_MQ_TAG_FAIL;
}
tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL,
data->hctx->tags);
if (tag < 0)
return BLK_MQ_TAG_FAIL;
return tag; finish_wait(&ws->wait, &wait);
}
unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) found_tag:
{ return tag + tag_offset;
if (data->flags & BLK_MQ_REQ_RESERVED)
return __blk_mq_get_reserved_tag(data);
return __blk_mq_get_tag(data);
} }
void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
unsigned int tag) struct blk_mq_ctx *ctx, unsigned int tag)
{ {
struct blk_mq_tags *tags = hctx->tags;
if (tag >= tags->nr_reserved_tags) { if (tag >= tags->nr_reserved_tags) {
const int real_tag = tag - tags->nr_reserved_tags; const int real_tag = tag - tags->nr_reserved_tags;
...@@ -312,11 +296,11 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set) ...@@ -312,11 +296,11 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
struct blk_mq_tags *tags = set->tags[i]; struct blk_mq_tags *tags = set->tags[i];
for (j = 0; j < tags->nr_tags; j++) { for (j = 0; j < tags->nr_tags; j++) {
if (!tags->rqs[j]) if (!tags->static_rqs[j])
continue; continue;
ret = set->ops->reinit_request(set->driver_data, ret = set->ops->reinit_request(set->driver_data,
tags->rqs[j]); tags->static_rqs[j]);
if (ret) if (ret)
goto out; goto out;
} }
...@@ -351,11 +335,6 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, ...@@ -351,11 +335,6 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
} }
static unsigned int bt_unused_tags(const struct sbitmap_queue *bt)
{
return bt->sb.depth - sbitmap_weight(&bt->sb);
}
static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth, static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
bool round_robin, int node) bool round_robin, int node)
{ {
...@@ -411,19 +390,56 @@ void blk_mq_free_tags(struct blk_mq_tags *tags) ...@@ -411,19 +390,56 @@ void blk_mq_free_tags(struct blk_mq_tags *tags)
kfree(tags); kfree(tags);
} }
int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth) int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
struct blk_mq_tags **tagsptr, unsigned int tdepth,
bool can_grow)
{ {
tdepth -= tags->nr_reserved_tags; struct blk_mq_tags *tags = *tagsptr;
if (tdepth > tags->nr_tags)
if (tdepth <= tags->nr_reserved_tags)
return -EINVAL; return -EINVAL;
tdepth -= tags->nr_reserved_tags;
/* /*
* Don't need (or can't) update reserved tags here, they remain * If we are allowed to grow beyond the original size, allocate
* static and should never need resizing. * a new set of tags before freeing the old one.
*/ */
sbitmap_queue_resize(&tags->bitmap_tags, tdepth); if (tdepth > tags->nr_tags) {
struct blk_mq_tag_set *set = hctx->queue->tag_set;
struct blk_mq_tags *new;
bool ret;
if (!can_grow)
return -EINVAL;
/*
* We need some sort of upper limit, set it high enough that
* no valid use cases should require more.
*/
if (tdepth > 16 * BLKDEV_MAX_RQ)
return -EINVAL;
new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth, 0);
if (!new)
return -ENOMEM;
ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth);
if (ret) {
blk_mq_free_rq_map(new);
return -ENOMEM;
}
blk_mq_free_rqs(set, *tagsptr, hctx->queue_num);
blk_mq_free_rq_map(*tagsptr);
*tagsptr = new;
} else {
/*
* Don't need (or can't) update reserved tags here, they
* remain static and should never need resizing.
*/
sbitmap_queue_resize(&tags->bitmap_tags, tdepth);
}
blk_mq_tag_wakeup_all(tags, false);
return 0; return 0;
} }
...@@ -454,25 +470,3 @@ u32 blk_mq_unique_tag(struct request *rq) ...@@ -454,25 +470,3 @@ u32 blk_mq_unique_tag(struct request *rq)
(rq->tag & BLK_MQ_UNIQUE_TAG_MASK); (rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
} }
EXPORT_SYMBOL(blk_mq_unique_tag); EXPORT_SYMBOL(blk_mq_unique_tag);
ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
{
char *orig_page = page;
unsigned int free, res;
if (!tags)
return 0;
page += sprintf(page, "nr_tags=%u, reserved_tags=%u, "
"bits_per_word=%u\n",
tags->nr_tags, tags->nr_reserved_tags,
1U << tags->bitmap_tags.sb.shift);
free = bt_unused_tags(&tags->bitmap_tags);
res = bt_unused_tags(&tags->breserved_tags);
page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res);
page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues));
return page - orig_page;
}
...@@ -16,6 +16,7 @@ struct blk_mq_tags { ...@@ -16,6 +16,7 @@ struct blk_mq_tags {
struct sbitmap_queue breserved_tags; struct sbitmap_queue breserved_tags;
struct request **rqs; struct request **rqs;
struct request **static_rqs;
struct list_head page_list; struct list_head page_list;
}; };
...@@ -24,11 +25,12 @@ extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int r ...@@ -24,11 +25,12 @@ extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int r
extern void blk_mq_free_tags(struct blk_mq_tags *tags); extern void blk_mq_free_tags(struct blk_mq_tags *tags);
extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
unsigned int tag); struct blk_mq_ctx *ctx, unsigned int tag);
extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth); struct blk_mq_tags **tags,
unsigned int depth, bool can_grow);
extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
void *priv); void *priv);
......
This diff is collapsed.
...@@ -32,7 +32,31 @@ void blk_mq_free_queue(struct request_queue *q); ...@@ -32,7 +32,31 @@ void blk_mq_free_queue(struct request_queue *q);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
void blk_mq_wake_waiters(struct request_queue *q); void blk_mq_wake_waiters(struct request_queue *q);
bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *); bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *);
void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx);
bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
bool wait);
/*
* Internal helpers for allocating/freeing the request map
*/
void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
unsigned int hctx_idx);
void blk_mq_free_rq_map(struct blk_mq_tags *tags);
struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
unsigned int hctx_idx,
unsigned int nr_tags,
unsigned int reserved_tags);
int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
unsigned int hctx_idx, unsigned int depth);
/*
* Internal helpers for request insertion into sw queues
*/
void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool at_head);
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct list_head *list);
/* /*
* CPU hotplug helpers * CPU hotplug helpers
*/ */
...@@ -57,6 +81,40 @@ extern int blk_mq_sysfs_register(struct request_queue *q); ...@@ -57,6 +81,40 @@ extern int blk_mq_sysfs_register(struct request_queue *q);
extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q);
extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
/*
* debugfs helpers
*/
#ifdef CONFIG_BLK_DEBUG_FS
void blk_mq_debugfs_init(void);
int blk_mq_debugfs_register(struct request_queue *q, const char *name);
void blk_mq_debugfs_unregister(struct request_queue *q);
int blk_mq_debugfs_register_hctxs(struct request_queue *q);
void blk_mq_debugfs_unregister_hctxs(struct request_queue *q);
#else
static inline void blk_mq_debugfs_init(void)
{
}
static inline int blk_mq_debugfs_register(struct request_queue *q,
const char *name)
{
return 0;
}
static inline void blk_mq_debugfs_unregister(struct request_queue *q)
{
}
static inline int blk_mq_debugfs_register_hctxs(struct request_queue *q)
{
return 0;
}
static inline void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
{
}
#endif
extern void blk_mq_rq_timed_out(struct request *req, bool reserved); extern void blk_mq_rq_timed_out(struct request *req, bool reserved);
void blk_mq_release(struct request_queue *q); void blk_mq_release(struct request_queue *q);
...@@ -103,6 +161,25 @@ static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data, ...@@ -103,6 +161,25 @@ static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data,
data->hctx = hctx; data->hctx = hctx;
} }
static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
{
if (data->flags & BLK_MQ_REQ_INTERNAL)
return data->hctx->sched_tags;
return data->hctx->tags;
}
/*
* Internal helpers for request allocation/init/free
*/
void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
struct request *rq, unsigned int op);
void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct request *rq);
void blk_mq_finish_request(struct request *rq);
struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
unsigned int op);
static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx) static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
{ {
return test_bit(BLK_MQ_S_STOPPED, &hctx->state); return test_bit(BLK_MQ_S_STOPPED, &hctx->state);
......
...@@ -272,6 +272,7 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq) ...@@ -272,6 +272,7 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq)
list_del_init(&rq->queuelist); list_del_init(&rq->queuelist);
rq->rq_flags &= ~RQF_QUEUED; rq->rq_flags &= ~RQF_QUEUED;
rq->tag = -1; rq->tag = -1;
rq->internal_tag = -1;
if (unlikely(bqt->tag_index[tag] == NULL)) if (unlikely(bqt->tag_index[tag] == NULL))
printk(KERN_ERR "%s: tag %d is missing\n", printk(KERN_ERR "%s: tag %d is missing\n",
......
...@@ -866,10 +866,12 @@ static void tg_update_disptime(struct throtl_grp *tg) ...@@ -866,10 +866,12 @@ static void tg_update_disptime(struct throtl_grp *tg)
unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime; unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime;
struct bio *bio; struct bio *bio;
if ((bio = throtl_peek_queued(&sq->queued[READ]))) bio = throtl_peek_queued(&sq->queued[READ]);
if (bio)
tg_may_dispatch(tg, bio, &read_wait); tg_may_dispatch(tg, bio, &read_wait);
if ((bio = throtl_peek_queued(&sq->queued[WRITE]))) bio = throtl_peek_queued(&sq->queued[WRITE]);
if (bio)
tg_may_dispatch(tg, bio, &write_wait); tg_may_dispatch(tg, bio, &write_wait);
min_wait = min(read_wait, write_wait); min_wait = min(read_wait, write_wait);
......
...@@ -167,7 +167,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) ...@@ -167,7 +167,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
return NULL; return NULL;
} }
if (unlikely(blk_queue_bypass(q)) || if (unlikely(blk_queue_bypass(q)) ||
!q->elevator->type->ops.elevator_dispatch_fn(q, 0)) !q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0))
return NULL; return NULL;
} }
} }
...@@ -176,16 +176,16 @@ static inline void elv_activate_rq(struct request_queue *q, struct request *rq) ...@@ -176,16 +176,16 @@ static inline void elv_activate_rq(struct request_queue *q, struct request *rq)
{ {
struct elevator_queue *e = q->elevator; struct elevator_queue *e = q->elevator;
if (e->type->ops.elevator_activate_req_fn) if (e->type->ops.sq.elevator_activate_req_fn)
e->type->ops.elevator_activate_req_fn(q, rq); e->type->ops.sq.elevator_activate_req_fn(q, rq);
} }
static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq) static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq)
{ {
struct elevator_queue *e = q->elevator; struct elevator_queue *e = q->elevator;
if (e->type->ops.elevator_deactivate_req_fn) if (e->type->ops.sq.elevator_deactivate_req_fn)
e->type->ops.elevator_deactivate_req_fn(q, rq); e->type->ops.sq.elevator_deactivate_req_fn(q, rq);
} }
#ifdef CONFIG_FAIL_IO_TIMEOUT #ifdef CONFIG_FAIL_IO_TIMEOUT
...@@ -263,6 +263,22 @@ void ioc_clear_queue(struct request_queue *q); ...@@ -263,6 +263,22 @@ void ioc_clear_queue(struct request_queue *q);
int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node); int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node);
/**
* rq_ioc - determine io_context for request allocation
* @bio: request being allocated is for this bio (can be %NULL)
*
* Determine io_context to use for request allocation for @bio. May return
* %NULL if %current->io_context doesn't exist.
*/
static inline struct io_context *rq_ioc(struct bio *bio)
{
#ifdef CONFIG_BLK_CGROUP
if (bio && bio->bi_ioc)
return bio->bi_ioc;
#endif
return current->io_context;
}
/** /**
* create_io_context - try to create task->io_context * create_io_context - try to create task->io_context
* @gfp_mask: allocation mask * @gfp_mask: allocation mask
......
...@@ -2749,9 +2749,11 @@ static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd) ...@@ -2749,9 +2749,11 @@ static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd)
if (!cfqg) if (!cfqg)
return NULL; return NULL;
for_each_cfqg_st(cfqg, i, j, st) for_each_cfqg_st(cfqg, i, j, st) {
if ((cfqq = cfq_rb_first(st)) != NULL) cfqq = cfq_rb_first(st);
if (cfqq)
return cfqq; return cfqq;
}
return NULL; return NULL;
} }
...@@ -3864,6 +3866,8 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, ...@@ -3864,6 +3866,8 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
goto out; goto out;
} }
/* cfq_init_cfqq() assumes cfqq->ioprio_class is initialized. */
cfqq->ioprio_class = IOPRIO_CLASS_NONE;
cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
cfq_init_prio_data(cfqq, cic); cfq_init_prio_data(cfqq, cic);
cfq_link_cfqq_cfqg(cfqq, cfqg); cfq_link_cfqq_cfqg(cfqq, cfqg);
...@@ -4837,7 +4841,7 @@ static struct elv_fs_entry cfq_attrs[] = { ...@@ -4837,7 +4841,7 @@ static struct elv_fs_entry cfq_attrs[] = {
}; };
static struct elevator_type iosched_cfq = { static struct elevator_type iosched_cfq = {
.ops = { .ops.sq = {
.elevator_merge_fn = cfq_merge, .elevator_merge_fn = cfq_merge,
.elevator_merged_fn = cfq_merged_request, .elevator_merged_fn = cfq_merged_request,
.elevator_merge_req_fn = cfq_merged_requests, .elevator_merge_req_fn = cfq_merged_requests,
......
...@@ -439,7 +439,7 @@ static struct elv_fs_entry deadline_attrs[] = { ...@@ -439,7 +439,7 @@ static struct elv_fs_entry deadline_attrs[] = {
}; };
static struct elevator_type iosched_deadline = { static struct elevator_type iosched_deadline = {
.ops = { .ops.sq = {
.elevator_merge_fn = deadline_merge, .elevator_merge_fn = deadline_merge,
.elevator_merged_fn = deadline_merged_request, .elevator_merged_fn = deadline_merged_request,
.elevator_merge_req_fn = deadline_merged_requests, .elevator_merge_req_fn = deadline_merged_requests,
......
This diff is collapsed.
This diff is collapsed.
...@@ -92,7 +92,7 @@ static void noop_exit_queue(struct elevator_queue *e) ...@@ -92,7 +92,7 @@ static void noop_exit_queue(struct elevator_queue *e)
} }
static struct elevator_type elevator_noop = { static struct elevator_type elevator_noop = {
.ops = { .ops.sq = {
.elevator_merge_req_fn = noop_merged_requests, .elevator_merge_req_fn = noop_merged_requests,
.elevator_dispatch_fn = noop_dispatch, .elevator_dispatch_fn = noop_dispatch,
.elevator_add_req_fn = noop_add_request, .elevator_add_req_fn = noop_add_request,
......
...@@ -293,7 +293,7 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state, ...@@ -293,7 +293,7 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state,
if (!gpt) if (!gpt)
return NULL; return NULL;
count = le32_to_cpu(gpt->num_partition_entries) * count = (size_t)le32_to_cpu(gpt->num_partition_entries) *
le32_to_cpu(gpt->sizeof_partition_entry); le32_to_cpu(gpt->sizeof_partition_entry);
if (!count) if (!count)
return NULL; return NULL;
...@@ -352,7 +352,7 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba, ...@@ -352,7 +352,7 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
gpt_header **gpt, gpt_entry **ptes) gpt_header **gpt, gpt_entry **ptes)
{ {
u32 crc, origcrc; u32 crc, origcrc;
u64 lastlba; u64 lastlba, pt_size;
if (!ptes) if (!ptes)
return 0; return 0;
...@@ -434,13 +434,20 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba, ...@@ -434,13 +434,20 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
goto fail; goto fail;
} }
/* Sanity check partition table size */
pt_size = (u64)le32_to_cpu((*gpt)->num_partition_entries) *
le32_to_cpu((*gpt)->sizeof_partition_entry);
if (pt_size > KMALLOC_MAX_SIZE) {
pr_debug("GUID Partition Table is too large: %llu > %lu bytes\n",
(unsigned long long)pt_size, KMALLOC_MAX_SIZE);
goto fail;
}
if (!(*ptes = alloc_read_gpt_entries(state, *gpt))) if (!(*ptes = alloc_read_gpt_entries(state, *gpt)))
goto fail; goto fail;
/* Check the GUID Partition Entry Array CRC */ /* Check the GUID Partition Entry Array CRC */
crc = efi_crc32((const unsigned char *) (*ptes), crc = efi_crc32((const unsigned char *) (*ptes), pt_size);
le32_to_cpu((*gpt)->num_partition_entries) *
le32_to_cpu((*gpt)->sizeof_partition_entry));
if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) { if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) {
pr_debug("GUID Partition Entry Array CRC check failed.\n"); pr_debug("GUID Partition Entry Array CRC check failed.\n");
......
...@@ -3119,7 +3119,7 @@ static int raw_cmd_copyin(int cmd, void __user *param, ...@@ -3119,7 +3119,7 @@ static int raw_cmd_copyin(int cmd, void __user *param,
*rcmd = NULL; *rcmd = NULL;
loop: loop:
ptr = kmalloc(sizeof(struct floppy_raw_cmd), GFP_USER); ptr = kmalloc(sizeof(struct floppy_raw_cmd), GFP_KERNEL);
if (!ptr) if (!ptr)
return -ENOMEM; return -ENOMEM;
*rcmd = ptr; *rcmd = ptr;
......
...@@ -666,7 +666,7 @@ static inline struct search *search_alloc(struct bio *bio, ...@@ -666,7 +666,7 @@ static inline struct search *search_alloc(struct bio *bio,
s->iop.write_prio = 0; s->iop.write_prio = 0;
s->iop.error = 0; s->iop.error = 0;
s->iop.flags = 0; s->iop.flags = 0;
s->iop.flush_journal = (bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) != 0; s->iop.flush_journal = op_is_flush(bio->bi_opf);
s->iop.wq = bcache_wq; s->iop.wq = bcache_wq;
return s; return s;
......
...@@ -787,8 +787,7 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) ...@@ -787,8 +787,7 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
spin_lock_irqsave(&cache->lock, flags); spin_lock_irqsave(&cache->lock, flags);
if (cache->need_tick_bio && if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
!(bio->bi_opf & (REQ_FUA | REQ_PREFLUSH)) &&
bio_op(bio) != REQ_OP_DISCARD) { bio_op(bio) != REQ_OP_DISCARD) {
pb->tick = true; pb->tick = true;
cache->need_tick_bio = false; cache->need_tick_bio = false;
...@@ -828,11 +827,6 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) ...@@ -828,11 +827,6 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
return to_oblock(block_nr); return to_oblock(block_nr);
} }
static int bio_triggers_commit(struct cache *cache, struct bio *bio)
{
return bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
}
/* /*
* You must increment the deferred set whilst the prison cell is held. To * You must increment the deferred set whilst the prison cell is held. To
* encourage this, we ask for 'cell' to be passed in. * encourage this, we ask for 'cell' to be passed in.
...@@ -884,7 +878,7 @@ static void issue(struct cache *cache, struct bio *bio) ...@@ -884,7 +878,7 @@ static void issue(struct cache *cache, struct bio *bio)
{ {
unsigned long flags; unsigned long flags;
if (!bio_triggers_commit(cache, bio)) { if (!op_is_flush(bio->bi_opf)) {
accounted_request(cache, bio); accounted_request(cache, bio);
return; return;
} }
...@@ -1069,8 +1063,7 @@ static void dec_io_migrations(struct cache *cache) ...@@ -1069,8 +1063,7 @@ static void dec_io_migrations(struct cache *cache)
static bool discard_or_flush(struct bio *bio) static bool discard_or_flush(struct bio *bio)
{ {
return bio_op(bio) == REQ_OP_DISCARD || return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf);
bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
} }
static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell) static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell)
......
...@@ -699,7 +699,7 @@ static void remap_to_origin(struct thin_c *tc, struct bio *bio) ...@@ -699,7 +699,7 @@ static void remap_to_origin(struct thin_c *tc, struct bio *bio)
static int bio_triggers_commit(struct thin_c *tc, struct bio *bio) static int bio_triggers_commit(struct thin_c *tc, struct bio *bio)
{ {
return (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) && return op_is_flush(bio->bi_opf) &&
dm_thin_changed_this_transaction(tc->td); dm_thin_changed_this_transaction(tc->td);
} }
...@@ -870,8 +870,7 @@ static void __inc_remap_and_issue_cell(void *context, ...@@ -870,8 +870,7 @@ static void __inc_remap_and_issue_cell(void *context,
struct bio *bio; struct bio *bio;
while ((bio = bio_list_pop(&cell->bios))) { while ((bio = bio_list_pop(&cell->bios))) {
if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) || if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD)
bio_op(bio) == REQ_OP_DISCARD)
bio_list_add(&info->defer_bios, bio); bio_list_add(&info->defer_bios, bio);
else { else {
inc_all_io_entry(info->tc->pool, bio); inc_all_io_entry(info->tc->pool, bio);
...@@ -1716,9 +1715,8 @@ static void __remap_and_issue_shared_cell(void *context, ...@@ -1716,9 +1715,8 @@ static void __remap_and_issue_shared_cell(void *context,
struct bio *bio; struct bio *bio;
while ((bio = bio_list_pop(&cell->bios))) { while ((bio = bio_list_pop(&cell->bios))) {
if ((bio_data_dir(bio) == WRITE) || if (bio_data_dir(bio) == WRITE || op_is_flush(bio->bi_opf) ||
(bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) || bio_op(bio) == REQ_OP_DISCARD)
bio_op(bio) == REQ_OP_DISCARD))
bio_list_add(&info->defer_bios, bio); bio_list_add(&info->defer_bios, bio);
else { else {
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));; struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));;
...@@ -2635,8 +2633,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) ...@@ -2635,8 +2633,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_SUBMITTED; return DM_MAPIO_SUBMITTED;
} }
if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) || if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD) {
bio_op(bio) == REQ_OP_DISCARD) {
thin_defer_bio_with_throttle(tc, bio); thin_defer_bio_with_throttle(tc, bio);
return DM_MAPIO_SUBMITTED; return DM_MAPIO_SUBMITTED;
} }
......
...@@ -1178,6 +1178,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) ...@@ -1178,6 +1178,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
dev->admin_tagset.timeout = ADMIN_TIMEOUT; dev->admin_tagset.timeout = ADMIN_TIMEOUT;
dev->admin_tagset.numa_node = dev_to_node(dev->dev); dev->admin_tagset.numa_node = dev_to_node(dev->dev);
dev->admin_tagset.cmd_size = nvme_cmd_size(dev); dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
dev->admin_tagset.driver_data = dev; dev->admin_tagset.driver_data = dev;
if (blk_mq_alloc_tag_set(&dev->admin_tagset)) if (blk_mq_alloc_tag_set(&dev->admin_tagset))
......
...@@ -22,6 +22,7 @@ struct blk_mq_hw_ctx { ...@@ -22,6 +22,7 @@ struct blk_mq_hw_ctx {
unsigned long flags; /* BLK_MQ_F_* flags */ unsigned long flags; /* BLK_MQ_F_* flags */
void *sched_data;
struct request_queue *queue; struct request_queue *queue;
struct blk_flush_queue *fq; struct blk_flush_queue *fq;
...@@ -35,6 +36,7 @@ struct blk_mq_hw_ctx { ...@@ -35,6 +36,7 @@ struct blk_mq_hw_ctx {
atomic_t wait_index; atomic_t wait_index;
struct blk_mq_tags *tags; struct blk_mq_tags *tags;
struct blk_mq_tags *sched_tags;
struct srcu_struct queue_rq_srcu; struct srcu_struct queue_rq_srcu;
...@@ -60,7 +62,7 @@ struct blk_mq_hw_ctx { ...@@ -60,7 +62,7 @@ struct blk_mq_hw_ctx {
struct blk_mq_tag_set { struct blk_mq_tag_set {
unsigned int *mq_map; unsigned int *mq_map;
struct blk_mq_ops *ops; const struct blk_mq_ops *ops;
unsigned int nr_hw_queues; unsigned int nr_hw_queues;
unsigned int queue_depth; /* max hw supported */ unsigned int queue_depth; /* max hw supported */
unsigned int reserved_tags; unsigned int reserved_tags;
...@@ -151,11 +153,13 @@ enum { ...@@ -151,11 +153,13 @@ enum {
BLK_MQ_F_SG_MERGE = 1 << 2, BLK_MQ_F_SG_MERGE = 1 << 2,
BLK_MQ_F_DEFER_ISSUE = 1 << 4, BLK_MQ_F_DEFER_ISSUE = 1 << 4,
BLK_MQ_F_BLOCKING = 1 << 5, BLK_MQ_F_BLOCKING = 1 << 5,
BLK_MQ_F_NO_SCHED = 1 << 6,
BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
BLK_MQ_F_ALLOC_POLICY_BITS = 1, BLK_MQ_F_ALLOC_POLICY_BITS = 1,
BLK_MQ_S_STOPPED = 0, BLK_MQ_S_STOPPED = 0,
BLK_MQ_S_TAG_ACTIVE = 1, BLK_MQ_S_TAG_ACTIVE = 1,
BLK_MQ_S_SCHED_RESTART = 2,
BLK_MQ_MAX_DEPTH = 10240, BLK_MQ_MAX_DEPTH = 10240,
...@@ -179,14 +183,13 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set); ...@@ -179,14 +183,13 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
void blk_mq_insert_request(struct request *, bool, bool, bool);
void blk_mq_free_request(struct request *rq); void blk_mq_free_request(struct request *rq);
void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq);
bool blk_mq_can_queue(struct blk_mq_hw_ctx *); bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
enum { enum {
BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */ BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */
BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */ BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */
BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */
}; };
struct request *blk_mq_alloc_request(struct request_queue *q, int rw, struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
......
...@@ -220,6 +220,15 @@ static inline bool op_is_write(unsigned int op) ...@@ -220,6 +220,15 @@ static inline bool op_is_write(unsigned int op)
return (op & 1); return (op & 1);
} }
/*
* Check if the bio or request is one that needs special treatment in the
* flush state machine.
*/
static inline bool op_is_flush(unsigned int op)
{
return op & (REQ_FUA | REQ_PREFLUSH);
}
/* /*
* Reads are always treated as synchronous, as are requests with the FUA or * Reads are always treated as synchronous, as are requests with the FUA or
* PREFLUSH flag. Other operations may be marked as synchronous using the * PREFLUSH flag. Other operations may be marked as synchronous using the
...@@ -232,22 +241,29 @@ static inline bool op_is_sync(unsigned int op) ...@@ -232,22 +241,29 @@ static inline bool op_is_sync(unsigned int op)
} }
typedef unsigned int blk_qc_t; typedef unsigned int blk_qc_t;
#define BLK_QC_T_NONE -1U #define BLK_QC_T_NONE -1U
#define BLK_QC_T_SHIFT 16 #define BLK_QC_T_SHIFT 16
#define BLK_QC_T_INTERNAL (1U << 31)
static inline bool blk_qc_t_valid(blk_qc_t cookie) static inline bool blk_qc_t_valid(blk_qc_t cookie)
{ {
return cookie != BLK_QC_T_NONE; return cookie != BLK_QC_T_NONE;
} }
static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num) static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num,
bool internal)
{ {
return tag | (queue_num << BLK_QC_T_SHIFT); blk_qc_t ret = tag | (queue_num << BLK_QC_T_SHIFT);
if (internal)
ret |= BLK_QC_T_INTERNAL;
return ret;
} }
static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
{ {
return cookie >> BLK_QC_T_SHIFT; return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT;
} }
static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
...@@ -255,6 +271,11 @@ static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) ...@@ -255,6 +271,11 @@ static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
return cookie & ((1u << BLK_QC_T_SHIFT) - 1); return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
} }
static inline bool blk_qc_t_is_internal(blk_qc_t cookie)
{
return (cookie & BLK_QC_T_INTERNAL) != 0;
}
struct blk_issue_stat { struct blk_issue_stat {
u64 time; u64 time;
}; };
......
...@@ -154,6 +154,7 @@ struct request { ...@@ -154,6 +154,7 @@ struct request {
/* the following two fields are internal, NEVER access directly */ /* the following two fields are internal, NEVER access directly */
unsigned int __data_len; /* total data len */ unsigned int __data_len; /* total data len */
int tag;
sector_t __sector; /* sector cursor */ sector_t __sector; /* sector cursor */
struct bio *bio; struct bio *bio;
...@@ -220,9 +221,10 @@ struct request { ...@@ -220,9 +221,10 @@ struct request {
unsigned short ioprio; unsigned short ioprio;
int internal_tag;
void *special; /* opaque pointer available for LLD use */ void *special; /* opaque pointer available for LLD use */
int tag;
int errors; int errors;
/* /*
...@@ -407,7 +409,7 @@ struct request_queue { ...@@ -407,7 +409,7 @@ struct request_queue {
dma_drain_needed_fn *dma_drain_needed; dma_drain_needed_fn *dma_drain_needed;
lld_busy_fn *lld_busy_fn; lld_busy_fn *lld_busy_fn;
struct blk_mq_ops *mq_ops; const struct blk_mq_ops *mq_ops;
unsigned int *mq_map; unsigned int *mq_map;
...@@ -569,6 +571,11 @@ struct request_queue { ...@@ -569,6 +571,11 @@ struct request_queue {
struct list_head tag_set_list; struct list_head tag_set_list;
struct bio_set *bio_split; struct bio_set *bio_split;
#ifdef CONFIG_DEBUG_FS
struct dentry *debugfs_dir;
struct dentry *mq_debugfs_dir;
#endif
bool mq_sysfs_init_done; bool mq_sysfs_init_done;
}; };
...@@ -600,6 +607,7 @@ struct request_queue { ...@@ -600,6 +607,7 @@ struct request_queue {
#define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */ #define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */
#define QUEUE_FLAG_DAX 26 /* device supports DAX */ #define QUEUE_FLAG_DAX 26 /* device supports DAX */
#define QUEUE_FLAG_STATS 27 /* track rq completion times */ #define QUEUE_FLAG_STATS 27 /* track rq completion times */
#define QUEUE_FLAG_RESTART 28 /* queue needs restart at completion */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \ (1 << QUEUE_FLAG_STACKABLE) | \
...@@ -1620,6 +1628,25 @@ static inline bool bvec_gap_to_prev(struct request_queue *q, ...@@ -1620,6 +1628,25 @@ static inline bool bvec_gap_to_prev(struct request_queue *q,
return __bvec_gap_to_prev(q, bprv, offset); return __bvec_gap_to_prev(q, bprv, offset);
} }
/*
* Check if the two bvecs from two bios can be merged to one segment.
* If yes, no need to check gap between the two bios since the 1st bio
* and the 1st bvec in the 2nd bio can be handled in one segment.
*/
static inline bool bios_segs_mergeable(struct request_queue *q,
struct bio *prev, struct bio_vec *prev_last_bv,
struct bio_vec *next_first_bv)
{
if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv))
return false;
if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv))
return false;
if (prev->bi_seg_back_size + next_first_bv->bv_len >
queue_max_segment_size(q))
return false;
return true;
}
static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
struct bio *next) struct bio *next)
{ {
...@@ -1629,7 +1656,8 @@ static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, ...@@ -1629,7 +1656,8 @@ static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
bio_get_last_bvec(prev, &pb); bio_get_last_bvec(prev, &pb);
bio_get_first_bvec(next, &nb); bio_get_first_bvec(next, &nb);
return __bvec_gap_to_prev(q, &pb, nb.bv_offset); if (!bios_segs_mergeable(q, prev, &pb, &nb))
return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
} }
return false; return false;
......
...@@ -77,6 +77,34 @@ struct elevator_ops ...@@ -77,6 +77,34 @@ struct elevator_ops
elevator_registered_fn *elevator_registered_fn; elevator_registered_fn *elevator_registered_fn;
}; };
struct blk_mq_alloc_data;
struct blk_mq_hw_ctx;
struct elevator_mq_ops {
int (*init_sched)(struct request_queue *, struct elevator_type *);
void (*exit_sched)(struct elevator_queue *);
bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *);
int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
void (*request_merged)(struct request_queue *, struct request *, int);
void (*requests_merged)(struct request_queue *, struct request *, struct request *);
struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *);
void (*put_request)(struct request *);
void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
bool (*has_work)(struct blk_mq_hw_ctx *);
void (*completed_request)(struct blk_mq_hw_ctx *, struct request *);
void (*started_request)(struct request *);
void (*requeue_request)(struct request *);
struct request *(*former_request)(struct request_queue *, struct request *);
struct request *(*next_request)(struct request_queue *, struct request *);
int (*get_rq_priv)(struct request_queue *, struct request *);
void (*put_rq_priv)(struct request_queue *, struct request *);
void (*init_icq)(struct io_cq *);
void (*exit_icq)(struct io_cq *);
};
#define ELV_NAME_MAX (16) #define ELV_NAME_MAX (16)
struct elv_fs_entry { struct elv_fs_entry {
...@@ -94,12 +122,16 @@ struct elevator_type ...@@ -94,12 +122,16 @@ struct elevator_type
struct kmem_cache *icq_cache; struct kmem_cache *icq_cache;
/* fields provided by elevator implementation */ /* fields provided by elevator implementation */
struct elevator_ops ops; union {
struct elevator_ops sq;
struct elevator_mq_ops mq;
} ops;
size_t icq_size; /* see iocontext.h */ size_t icq_size; /* see iocontext.h */
size_t icq_align; /* ditto */ size_t icq_align; /* ditto */
struct elv_fs_entry *elevator_attrs; struct elv_fs_entry *elevator_attrs;
char elevator_name[ELV_NAME_MAX]; char elevator_name[ELV_NAME_MAX];
struct module *elevator_owner; struct module *elevator_owner;
bool uses_mq;
/* managed by elevator core */ /* managed by elevator core */
char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */ char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */
...@@ -123,6 +155,7 @@ struct elevator_queue ...@@ -123,6 +155,7 @@ struct elevator_queue
struct kobject kobj; struct kobject kobj;
struct mutex sysfs_lock; struct mutex sysfs_lock;
unsigned int registered:1; unsigned int registered:1;
unsigned int uses_mq:1;
DECLARE_HASHTABLE(hash, ELV_HASH_BITS); DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
}; };
...@@ -139,6 +172,7 @@ extern void elv_merge_requests(struct request_queue *, struct request *, ...@@ -139,6 +172,7 @@ extern void elv_merge_requests(struct request_queue *, struct request *,
extern void elv_merged_request(struct request_queue *, struct request *, int); extern void elv_merged_request(struct request_queue *, struct request *, int);
extern void elv_bio_merged(struct request_queue *q, struct request *, extern void elv_bio_merged(struct request_queue *q, struct request *,
struct bio *); struct bio *);
extern bool elv_attempt_insert_merge(struct request_queue *, struct request *);
extern void elv_requeue_request(struct request_queue *, struct request *); extern void elv_requeue_request(struct request_queue *, struct request *);
extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_former_request(struct request_queue *, struct request *);
extern struct request *elv_latter_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *);
......
...@@ -258,6 +258,26 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) ...@@ -258,6 +258,26 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr)
unsigned int sbitmap_weight(const struct sbitmap *sb); unsigned int sbitmap_weight(const struct sbitmap *sb);
/**
* sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file.
* @sb: Bitmap to show.
* @m: struct seq_file to write to.
*
* This is intended for debugging. The format may change at any time.
*/
void sbitmap_show(struct sbitmap *sb, struct seq_file *m);
/**
* sbitmap_bitmap_show() - Write a hex dump of a &struct sbitmap to a &struct
* seq_file.
* @sb: Bitmap to show.
* @m: struct seq_file to write to.
*
* This is intended for debugging. The output isn't guaranteed to be internally
* consistent.
*/
void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m);
/** /**
* sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific
* memory node. * memory node.
...@@ -370,4 +390,14 @@ static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq, ...@@ -370,4 +390,14 @@ static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq,
*/ */
void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); void sbitmap_queue_wake_all(struct sbitmap_queue *sbq);
/**
* sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct
* seq_file.
* @sbq: Bitmap queue to show.
* @m: struct seq_file to write to.
*
* This is intended for debugging. The format may change at any time.
*/
void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m);
#endif /* __LINUX_SCALE_BITMAP_H */ #endif /* __LINUX_SCALE_BITMAP_H */
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment