Commit 47db9b9a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-2019-09-27' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "A few fixes/changes to round off this merge window. This contains:

   - Small series making some functional tweaks to blk-iocost (Tejun)

   - Elevator switch locking fix (Ming)

   - Kill redundant call in blk-wbt (Yufen)

   - Fix flush timeout handling (Yufen)"

* tag 'for-linus-2019-09-27' of git://git.kernel.dk/linux-block:
  block: fix null pointer dereference in blk_mq_rq_timed_out()
  rq-qos: get rid of redundant wbt_update_limits()
  iocost: bump up default latency targets for hard disks
  iocost: improve nr_lagging handling
  iocost: better trace vrate changes
  block: don't release queue's sysfs lock during switching elevator
  blk-mq: move lockdep_assert_held() into elevator_exit
parents d0e00bc5 8d699663
...@@ -214,6 +214,16 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) ...@@ -214,6 +214,16 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
/* release the tag's ownership to the req cloned from */ /* release the tag's ownership to the req cloned from */
spin_lock_irqsave(&fq->mq_flush_lock, flags); spin_lock_irqsave(&fq->mq_flush_lock, flags);
if (!refcount_dec_and_test(&flush_rq->ref)) {
fq->rq_status = error;
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
return;
}
if (fq->rq_status != BLK_STS_OK)
error = fq->rq_status;
hctx = flush_rq->mq_hctx; hctx = flush_rq->mq_hctx;
if (!q->elevator) { if (!q->elevator) {
blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq); blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
......
...@@ -529,8 +529,8 @@ struct iocg_wake_ctx { ...@@ -529,8 +529,8 @@ struct iocg_wake_ctx {
static const struct ioc_params autop[] = { static const struct ioc_params autop[] = {
[AUTOP_HDD] = { [AUTOP_HDD] = {
.qos = { .qos = {
[QOS_RLAT] = 50000, /* 50ms */ [QOS_RLAT] = 250000, /* 250ms */
[QOS_WLAT] = 50000, [QOS_WLAT] = 250000,
[QOS_MIN] = VRATE_MIN_PPM, [QOS_MIN] = VRATE_MIN_PPM,
[QOS_MAX] = VRATE_MAX_PPM, [QOS_MAX] = VRATE_MAX_PPM,
}, },
...@@ -1343,7 +1343,7 @@ static void ioc_timer_fn(struct timer_list *timer) ...@@ -1343,7 +1343,7 @@ static void ioc_timer_fn(struct timer_list *timer)
u32 ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM]; u32 ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM];
u32 missed_ppm[2], rq_wait_pct; u32 missed_ppm[2], rq_wait_pct;
u64 period_vtime; u64 period_vtime;
int i; int prev_busy_level, i;
/* how were the latencies during the period? */ /* how were the latencies during the period? */
ioc_lat_stat(ioc, missed_ppm, &rq_wait_pct); ioc_lat_stat(ioc, missed_ppm, &rq_wait_pct);
...@@ -1407,7 +1407,8 @@ static void ioc_timer_fn(struct timer_list *timer) ...@@ -1407,7 +1407,8 @@ static void ioc_timer_fn(struct timer_list *timer)
* comparing vdone against period start. If lagging behind * comparing vdone against period start. If lagging behind
* IOs from past periods, don't increase vrate. * IOs from past periods, don't increase vrate.
*/ */
if (!atomic_read(&iocg_to_blkg(iocg)->use_delay) && if ((ppm_rthr != MILLION || ppm_wthr != MILLION) &&
!atomic_read(&iocg_to_blkg(iocg)->use_delay) &&
time_after64(vtime, vdone) && time_after64(vtime, vdone) &&
time_after64(vtime, now.vnow - time_after64(vtime, now.vnow -
MAX_LAGGING_PERIODS * period_vtime) && MAX_LAGGING_PERIODS * period_vtime) &&
...@@ -1531,26 +1532,29 @@ static void ioc_timer_fn(struct timer_list *timer) ...@@ -1531,26 +1532,29 @@ static void ioc_timer_fn(struct timer_list *timer)
* and experiencing shortages but not surpluses, we're too stingy * and experiencing shortages but not surpluses, we're too stingy
* and should increase vtime rate. * and should increase vtime rate.
*/ */
prev_busy_level = ioc->busy_level;
if (rq_wait_pct > RQ_WAIT_BUSY_PCT || if (rq_wait_pct > RQ_WAIT_BUSY_PCT ||
missed_ppm[READ] > ppm_rthr || missed_ppm[READ] > ppm_rthr ||
missed_ppm[WRITE] > ppm_wthr) { missed_ppm[WRITE] > ppm_wthr) {
ioc->busy_level = max(ioc->busy_level, 0); ioc->busy_level = max(ioc->busy_level, 0);
ioc->busy_level++; ioc->busy_level++;
} else if (nr_lagging) { } else if (rq_wait_pct <= RQ_WAIT_BUSY_PCT * UNBUSY_THR_PCT / 100 &&
ioc->busy_level = max(ioc->busy_level, 0);
} else if (nr_shortages && !nr_surpluses &&
rq_wait_pct <= RQ_WAIT_BUSY_PCT * UNBUSY_THR_PCT / 100 &&
missed_ppm[READ] <= ppm_rthr * UNBUSY_THR_PCT / 100 && missed_ppm[READ] <= ppm_rthr * UNBUSY_THR_PCT / 100 &&
missed_ppm[WRITE] <= ppm_wthr * UNBUSY_THR_PCT / 100) { missed_ppm[WRITE] <= ppm_wthr * UNBUSY_THR_PCT / 100) {
ioc->busy_level = min(ioc->busy_level, 0); /* take action iff there is contention */
ioc->busy_level--; if (nr_shortages && !nr_lagging) {
ioc->busy_level = min(ioc->busy_level, 0);
/* redistribute surpluses first */
if (!nr_surpluses)
ioc->busy_level--;
}
} else { } else {
ioc->busy_level = 0; ioc->busy_level = 0;
} }
ioc->busy_level = clamp(ioc->busy_level, -1000, 1000); ioc->busy_level = clamp(ioc->busy_level, -1000, 1000);
if (ioc->busy_level) { if (ioc->busy_level > 0 || (ioc->busy_level < 0 && !nr_lagging)) {
u64 vrate = atomic64_read(&ioc->vtime_rate); u64 vrate = atomic64_read(&ioc->vtime_rate);
u64 vrate_min = ioc->vrate_min, vrate_max = ioc->vrate_max; u64 vrate_min = ioc->vrate_min, vrate_max = ioc->vrate_max;
...@@ -1592,6 +1596,10 @@ static void ioc_timer_fn(struct timer_list *timer) ...@@ -1592,6 +1596,10 @@ static void ioc_timer_fn(struct timer_list *timer)
atomic64_set(&ioc->vtime_rate, vrate); atomic64_set(&ioc->vtime_rate, vrate);
ioc->inuse_margin_vtime = DIV64_U64_ROUND_UP( ioc->inuse_margin_vtime = DIV64_U64_ROUND_UP(
ioc->period_us * vrate * INUSE_MARGIN_PCT, 100); ioc->period_us * vrate * INUSE_MARGIN_PCT, 100);
} else if (ioc->busy_level != prev_busy_level || nr_lagging) {
trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate),
&missed_ppm, rq_wait_pct, nr_lagging,
nr_shortages, nr_surpluses);
} }
ioc_refresh_params(ioc, false); ioc_refresh_params(ioc, false);
......
...@@ -555,8 +555,6 @@ void blk_mq_sched_free_requests(struct request_queue *q) ...@@ -555,8 +555,6 @@ void blk_mq_sched_free_requests(struct request_queue *q)
struct blk_mq_hw_ctx *hctx; struct blk_mq_hw_ctx *hctx;
int i; int i;
lockdep_assert_held(&q->sysfs_lock);
queue_for_each_hw_ctx(q, hctx, i) { queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->sched_tags) if (hctx->sched_tags)
blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i); blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i);
......
...@@ -918,7 +918,10 @@ static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, ...@@ -918,7 +918,10 @@ static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
*/ */
if (blk_mq_req_expired(rq, next)) if (blk_mq_req_expired(rq, next))
blk_mq_rq_timed_out(rq, reserved); blk_mq_rq_timed_out(rq, reserved);
if (refcount_dec_and_test(&rq->ref))
if (is_flush_rq(rq, hctx))
rq->end_io(rq, 0);
else if (refcount_dec_and_test(&rq->ref))
__blk_mq_free_request(rq); __blk_mq_free_request(rq);
return true; return true;
......
...@@ -482,7 +482,6 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, ...@@ -482,7 +482,6 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
blk_mq_quiesce_queue(q); blk_mq_quiesce_queue(q);
wbt_set_min_lat(q, val); wbt_set_min_lat(q, val);
wbt_update_limits(q);
blk_mq_unquiesce_queue(q); blk_mq_unquiesce_queue(q);
blk_mq_unfreeze_queue(q); blk_mq_unfreeze_queue(q);
...@@ -989,13 +988,11 @@ int blk_register_queue(struct gendisk *disk) ...@@ -989,13 +988,11 @@ int blk_register_queue(struct gendisk *disk)
blk_mq_debugfs_register(q); blk_mq_debugfs_register(q);
} }
/* mutex_lock(&q->sysfs_lock);
* The flag of QUEUE_FLAG_REGISTERED isn't set yet, so elevator
* switch won't happen at all.
*/
if (q->elevator) { if (q->elevator) {
ret = elv_register_queue(q, false); ret = elv_register_queue(q, false);
if (ret) { if (ret) {
mutex_unlock(&q->sysfs_lock);
mutex_unlock(&q->sysfs_dir_lock); mutex_unlock(&q->sysfs_dir_lock);
kobject_del(&q->kobj); kobject_del(&q->kobj);
blk_trace_remove_sysfs(dev); blk_trace_remove_sysfs(dev);
...@@ -1005,7 +1002,6 @@ int blk_register_queue(struct gendisk *disk) ...@@ -1005,7 +1002,6 @@ int blk_register_queue(struct gendisk *disk)
has_elevator = true; has_elevator = true;
} }
mutex_lock(&q->sysfs_lock);
blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
wbt_enable_default(q); wbt_enable_default(q);
blk_throtl_register_queue(q); blk_throtl_register_queue(q);
...@@ -1062,12 +1058,10 @@ void blk_unregister_queue(struct gendisk *disk) ...@@ -1062,12 +1058,10 @@ void blk_unregister_queue(struct gendisk *disk)
kobject_del(&q->kobj); kobject_del(&q->kobj);
blk_trace_remove_sysfs(disk_to_dev(disk)); blk_trace_remove_sysfs(disk_to_dev(disk));
/* mutex_lock(&q->sysfs_lock);
* q->kobj has been removed, so it is safe to check if elevator
* exists without holding q->sysfs_lock.
*/
if (q->elevator) if (q->elevator)
elv_unregister_queue(q); elv_unregister_queue(q);
mutex_unlock(&q->sysfs_lock);
mutex_unlock(&q->sysfs_dir_lock); mutex_unlock(&q->sysfs_dir_lock);
kobject_put(&disk_to_dev(disk)->kobj); kobject_put(&disk_to_dev(disk)->kobj);
......
...@@ -19,6 +19,7 @@ struct blk_flush_queue { ...@@ -19,6 +19,7 @@ struct blk_flush_queue {
unsigned int flush_queue_delayed:1; unsigned int flush_queue_delayed:1;
unsigned int flush_pending_idx:1; unsigned int flush_pending_idx:1;
unsigned int flush_running_idx:1; unsigned int flush_running_idx:1;
blk_status_t rq_status;
unsigned long flush_pending_since; unsigned long flush_pending_since;
struct list_head flush_queue[2]; struct list_head flush_queue[2];
struct list_head flush_data_in_flight; struct list_head flush_data_in_flight;
...@@ -47,6 +48,12 @@ static inline void __blk_get_queue(struct request_queue *q) ...@@ -47,6 +48,12 @@ static inline void __blk_get_queue(struct request_queue *q)
kobject_get(&q->kobj); kobject_get(&q->kobj);
} }
static inline bool
is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx)
{
return hctx->fq->flush_rq == req;
}
struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
int node, int cmd_size, gfp_t flags); int node, int cmd_size, gfp_t flags);
void blk_free_flush_queue(struct blk_flush_queue *q); void blk_free_flush_queue(struct blk_flush_queue *q);
...@@ -194,6 +201,8 @@ void elv_unregister_queue(struct request_queue *q); ...@@ -194,6 +201,8 @@ void elv_unregister_queue(struct request_queue *q);
static inline void elevator_exit(struct request_queue *q, static inline void elevator_exit(struct request_queue *q,
struct elevator_queue *e) struct elevator_queue *e)
{ {
lockdep_assert_held(&q->sysfs_lock);
blk_mq_sched_free_requests(q); blk_mq_sched_free_requests(q);
__elevator_exit(q, e); __elevator_exit(q, e);
} }
......
...@@ -503,9 +503,7 @@ int elv_register_queue(struct request_queue *q, bool uevent) ...@@ -503,9 +503,7 @@ int elv_register_queue(struct request_queue *q, bool uevent)
if (uevent) if (uevent)
kobject_uevent(&e->kobj, KOBJ_ADD); kobject_uevent(&e->kobj, KOBJ_ADD);
mutex_lock(&q->sysfs_lock);
e->registered = 1; e->registered = 1;
mutex_unlock(&q->sysfs_lock);
} }
return error; return error;
} }
...@@ -523,11 +521,9 @@ void elv_unregister_queue(struct request_queue *q) ...@@ -523,11 +521,9 @@ void elv_unregister_queue(struct request_queue *q)
kobject_uevent(&e->kobj, KOBJ_REMOVE); kobject_uevent(&e->kobj, KOBJ_REMOVE);
kobject_del(&e->kobj); kobject_del(&e->kobj);
mutex_lock(&q->sysfs_lock);
e->registered = 0; e->registered = 0;
/* Re-enable throttling in case elevator disabled it */ /* Re-enable throttling in case elevator disabled it */
wbt_enable_default(q); wbt_enable_default(q);
mutex_unlock(&q->sysfs_lock);
} }
} }
...@@ -590,32 +586,11 @@ int elevator_switch_mq(struct request_queue *q, ...@@ -590,32 +586,11 @@ int elevator_switch_mq(struct request_queue *q,
lockdep_assert_held(&q->sysfs_lock); lockdep_assert_held(&q->sysfs_lock);
if (q->elevator) { if (q->elevator) {
if (q->elevator->registered) { if (q->elevator->registered)
mutex_unlock(&q->sysfs_lock);
/*
* Concurrent elevator switch can't happen becasue
* sysfs write is always exclusively on same file.
*
* Also the elevator queue won't be freed after
* sysfs_lock is released becasue kobject_del() in
* blk_unregister_queue() waits for completion of
* .store & .show on its attributes.
*/
elv_unregister_queue(q); elv_unregister_queue(q);
mutex_lock(&q->sysfs_lock);
}
ioc_clear_queue(q); ioc_clear_queue(q);
elevator_exit(q, q->elevator); elevator_exit(q, q->elevator);
/*
* sysfs_lock may be dropped, so re-check if queue is
* unregistered. If yes, don't switch to new elevator
* any more
*/
if (!blk_queue_registered(q))
return 0;
} }
ret = blk_mq_init_sched(q, new_e); ret = blk_mq_init_sched(q, new_e);
...@@ -623,11 +598,7 @@ int elevator_switch_mq(struct request_queue *q, ...@@ -623,11 +598,7 @@ int elevator_switch_mq(struct request_queue *q,
goto out; goto out;
if (new_e) { if (new_e) {
mutex_unlock(&q->sysfs_lock);
ret = elv_register_queue(q, true); ret = elv_register_queue(q, true);
mutex_lock(&q->sysfs_lock);
if (ret) { if (ret) {
elevator_exit(q, q->elevator); elevator_exit(q, q->elevator);
goto out; goto out;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment