Commit e1b2324d authored by Arianna Avanzini's avatar Arianna Avanzini Committed by Jens Axboe

block, bfq: handle bursts of queue activations

Many popular I/O-intensive services or applications spawn or
reactivate many parallel threads/processes during short time
intervals. Examples are systemd during boot or git grep.  These
services or applications benefit mostly from a high throughput: the
quicker the I/O generated by their processes is cumulatively served,
the sooner the target job of these services or applications gets
completed. As a consequence, it is almost always counterproductive to
weight-raise any of the queues associated to the processes of these
services or applications: in most cases it would just lower the
throughput, mainly because weight-raising also implies device idling.

To address this issue, an I/O scheduler needs, first, to detect which
queues are associated with these services or applications. In this
respect, we have that, from the I/O-scheduler standpoint, these
services or applications cause bursts of activations, i.e.,
activations of different queues occurring shortly after each
other. However, a shorter burst of activations may be caused also by
the start of an application that does not consist in a lot of parallel
I/O-bound threads (see the comments on the function bfq_handle_burst
for details).

In view of these facts, this commit introduces:
1) an heuristic to detect (only) bursts of queue activations caused by
   services or applications consisting in many parallel I/O-bound
   threads;
2) the prevention of device idling and weight-raising for the queues
   belonging to these bursts.
Signed-off-by: default avatarArianna Avanzini <avanzini.arianna@gmail.com>
Signed-off-by: default avatarPaolo Valente <paolo.valente@linaro.org>
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent e01eff01
...@@ -360,6 +360,10 @@ struct bfq_queue { ...@@ -360,6 +360,10 @@ struct bfq_queue {
/* bit vector: a 1 for each seeky requests in history */ /* bit vector: a 1 for each seeky requests in history */
u32 seek_history; u32 seek_history;
/* node for the device's burst list */
struct hlist_node burst_list_node;
/* position of the last request enqueued */ /* position of the last request enqueued */
sector_t last_request_pos; sector_t last_request_pos;
...@@ -442,6 +446,17 @@ struct bfq_io_cq { ...@@ -442,6 +446,17 @@ struct bfq_io_cq {
*/ */
bool saved_IO_bound; bool saved_IO_bound;
/*
* Same purpose as the previous fields for the value of the
* field keeping the queue's belonging to a large burst
*/
bool saved_in_large_burst;
/*
* True if the queue belonged to a burst list before its merge
* with another cooperating queue.
*/
bool was_in_burst_list;
/* /*
* Similar to previous fields: save wr information. * Similar to previous fields: save wr information.
*/ */
...@@ -609,6 +624,36 @@ struct bfq_data { ...@@ -609,6 +624,36 @@ struct bfq_data {
*/ */
bool strict_guarantees; bool strict_guarantees;
/*
* Last time at which a queue entered the current burst of
* queues being activated shortly after each other; for more
* details about this and the following parameters related to
* a burst of activations, see the comments on the function
* bfq_handle_burst.
*/
unsigned long last_ins_in_burst;
/*
* Reference time interval used to decide whether a queue has
* been activated shortly after @last_ins_in_burst.
*/
unsigned long bfq_burst_interval;
/* number of queues in the current burst of queue activations */
int burst_size;
/* common parent entity for the queues in the burst */
struct bfq_entity *burst_parent_entity;
/* Maximum burst size above which the current queue-activation
* burst is deemed as 'large'.
*/
unsigned long bfq_large_burst_thresh;
/* true if a large queue-activation burst is in progress */
bool large_burst;
/*
* Head of the burst list (as for the above fields, more
* details in the comments on the function bfq_handle_burst).
*/
struct hlist_head burst_list;
/* if set to true, low-latency heuristics are enabled */ /* if set to true, low-latency heuristics are enabled */
bool low_latency; bool low_latency;
/* /*
...@@ -671,7 +716,8 @@ struct bfq_data { ...@@ -671,7 +716,8 @@ struct bfq_data {
}; };
enum bfqq_state_flags { enum bfqq_state_flags {
BFQQF_busy = 0, /* has requests or is in service */ BFQQF_just_created = 0, /* queue just allocated */
BFQQF_busy, /* has requests or is in service */
BFQQF_wait_request, /* waiting for a request */ BFQQF_wait_request, /* waiting for a request */
BFQQF_non_blocking_wait_rq, /* BFQQF_non_blocking_wait_rq, /*
* waiting for a request * waiting for a request
...@@ -685,6 +731,10 @@ enum bfqq_state_flags { ...@@ -685,6 +731,10 @@ enum bfqq_state_flags {
* having consumed at most 2/10 of * having consumed at most 2/10 of
* its budget * its budget
*/ */
BFQQF_in_large_burst, /*
* bfqq activated in a large burst,
* see comments to bfq_handle_burst.
*/
BFQQF_softrt_update, /* BFQQF_softrt_update, /*
* may need softrt-next-start * may need softrt-next-start
* update * update
...@@ -707,6 +757,7 @@ static int bfq_bfqq_##name(const struct bfq_queue *bfqq) \ ...@@ -707,6 +757,7 @@ static int bfq_bfqq_##name(const struct bfq_queue *bfqq) \
return test_bit(BFQQF_##name, &(bfqq)->flags); \ return test_bit(BFQQF_##name, &(bfqq)->flags); \
} }
BFQ_BFQQ_FNS(just_created);
BFQ_BFQQ_FNS(busy); BFQ_BFQQ_FNS(busy);
BFQ_BFQQ_FNS(wait_request); BFQ_BFQQ_FNS(wait_request);
BFQ_BFQQ_FNS(non_blocking_wait_rq); BFQ_BFQQ_FNS(non_blocking_wait_rq);
...@@ -714,6 +765,7 @@ BFQ_BFQQ_FNS(fifo_expire); ...@@ -714,6 +765,7 @@ BFQ_BFQQ_FNS(fifo_expire);
BFQ_BFQQ_FNS(idle_window); BFQ_BFQQ_FNS(idle_window);
BFQ_BFQQ_FNS(sync); BFQ_BFQQ_FNS(sync);
BFQ_BFQQ_FNS(IO_bound); BFQ_BFQQ_FNS(IO_bound);
BFQ_BFQQ_FNS(in_large_burst);
BFQ_BFQQ_FNS(coop); BFQ_BFQQ_FNS(coop);
BFQ_BFQQ_FNS(split_coop); BFQ_BFQQ_FNS(split_coop);
BFQ_BFQQ_FNS(softrt_update); BFQ_BFQQ_FNS(softrt_update);
...@@ -4303,9 +4355,9 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) ...@@ -4303,9 +4355,9 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish; bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish;
bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time; bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time;
if (bfqq->wr_coeff > 1 && if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) ||
time_is_before_jiffies(bfqq->last_wr_start_finish + time_is_before_jiffies(bfqq->last_wr_start_finish +
bfqq->wr_cur_max_time)) { bfqq->wr_cur_max_time))) {
bfq_log_bfqq(bfqq->bfqd, bfqq, bfq_log_bfqq(bfqq->bfqd, bfqq,
"resume state: switching off wr"); "resume state: switching off wr");
...@@ -4321,6 +4373,232 @@ static int bfqq_process_refs(struct bfq_queue *bfqq) ...@@ -4321,6 +4373,232 @@ static int bfqq_process_refs(struct bfq_queue *bfqq)
return bfqq->ref - bfqq->allocated - bfqq->entity.on_st; return bfqq->ref - bfqq->allocated - bfqq->entity.on_st;
} }
/* Empty burst list and add just bfqq (see comments on bfq_handle_burst) */
static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq)
{
struct bfq_queue *item;
struct hlist_node *n;
hlist_for_each_entry_safe(item, n, &bfqd->burst_list, burst_list_node)
hlist_del_init(&item->burst_list_node);
hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
bfqd->burst_size = 1;
bfqd->burst_parent_entity = bfqq->entity.parent;
}
/* Add bfqq to the list of queues in current burst (see bfq_handle_burst) */
static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
{
/* Increment burst size to take into account also bfqq */
bfqd->burst_size++;
if (bfqd->burst_size == bfqd->bfq_large_burst_thresh) {
struct bfq_queue *pos, *bfqq_item;
struct hlist_node *n;
/*
* Enough queues have been activated shortly after each
* other to consider this burst as large.
*/
bfqd->large_burst = true;
/*
* We can now mark all queues in the burst list as
* belonging to a large burst.
*/
hlist_for_each_entry(bfqq_item, &bfqd->burst_list,
burst_list_node)
bfq_mark_bfqq_in_large_burst(bfqq_item);
bfq_mark_bfqq_in_large_burst(bfqq);
/*
* From now on, and until the current burst finishes, any
* new queue being activated shortly after the last queue
* was inserted in the burst can be immediately marked as
* belonging to a large burst. So the burst list is not
* needed any more. Remove it.
*/
hlist_for_each_entry_safe(pos, n, &bfqd->burst_list,
burst_list_node)
hlist_del_init(&pos->burst_list_node);
} else /*
* Burst not yet large: add bfqq to the burst list. Do
* not increment the ref counter for bfqq, because bfqq
* is removed from the burst list before freeing bfqq
* in put_queue.
*/
hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
}
/*
* If many queues belonging to the same group happen to be created
* shortly after each other, then the processes associated with these
* queues have typically a common goal. In particular, bursts of queue
* creations are usually caused by services or applications that spawn
* many parallel threads/processes. Examples are systemd during boot,
* or git grep. To help these processes get their job done as soon as
* possible, it is usually better to not grant either weight-raising
* or device idling to their queues.
*
* In this comment we describe, firstly, the reasons why this fact
* holds, and, secondly, the next function, which implements the main
* steps needed to properly mark these queues so that they can then be
* treated in a different way.
*
* The above services or applications benefit mostly from a high
* throughput: the quicker the requests of the activated queues are
* cumulatively served, the sooner the target job of these queues gets
* completed. As a consequence, weight-raising any of these queues,
* which also implies idling the device for it, is almost always
* counterproductive. In most cases it just lowers throughput.
*
* On the other hand, a burst of queue creations may be caused also by
* the start of an application that does not consist of a lot of
* parallel I/O-bound threads. In fact, with a complex application,
* several short processes may need to be executed to start-up the
* application. In this respect, to start an application as quickly as
* possible, the best thing to do is in any case to privilege the I/O
* related to the application with respect to all other
* I/O. Therefore, the best strategy to start as quickly as possible
* an application that causes a burst of queue creations is to
* weight-raise all the queues created during the burst. This is the
* exact opposite of the best strategy for the other type of bursts.
*
* In the end, to take the best action for each of the two cases, the
* two types of bursts need to be distinguished. Fortunately, this
* seems relatively easy, by looking at the sizes of the bursts. In
* particular, we found a threshold such that only bursts with a
* larger size than that threshold are apparently caused by
* services or commands such as systemd or git grep. For brevity,
* hereafter we call just 'large' these bursts. BFQ *does not*
* weight-raise queues whose creation occurs in a large burst. In
* addition, for each of these queues BFQ performs or does not perform
* idling depending on which choice boosts the throughput more. The
* exact choice depends on the device and request pattern at
* hand.
*
* Unfortunately, false positives may occur while an interactive task
* is starting (e.g., an application is being started). The
* consequence is that the queues associated with the task do not
* enjoy weight raising as expected. Fortunately these false positives
* are very rare. They typically occur if some service happens to
* start doing I/O exactly when the interactive task starts.
*
* Turning back to the next function, it implements all the steps
* needed to detect the occurrence of a large burst and to properly
* mark all the queues belonging to it (so that they can then be
* treated in a different way). This goal is achieved by maintaining a
* "burst list" that holds, temporarily, the queues that belong to the
* burst in progress. The list is then used to mark these queues as
* belonging to a large burst if the burst does become large. The main
* steps are the following.
*
* . when the very first queue is created, the queue is inserted into the
* list (as it could be the first queue in a possible burst)
*
* . if the current burst has not yet become large, and a queue Q that does
* not yet belong to the burst is activated shortly after the last time
* at which a new queue entered the burst list, then the function appends
* Q to the burst list
*
* . if, as a consequence of the previous step, the burst size reaches
* the large-burst threshold, then
*
* . all the queues in the burst list are marked as belonging to a
* large burst
*
* . the burst list is deleted; in fact, the burst list already served
* its purpose (keeping temporarily track of the queues in a burst,
* so as to be able to mark them as belonging to a large burst in the
* previous sub-step), and now is not needed any more
*
* . the device enters a large-burst mode
*
* . if a queue Q that does not belong to the burst is created while
* the device is in large-burst mode and shortly after the last time
* at which a queue either entered the burst list or was marked as
* belonging to the current large burst, then Q is immediately marked
* as belonging to a large burst.
*
* . if a queue Q that does not belong to the burst is created a while
* later, i.e., not shortly after, than the last time at which a queue
* either entered the burst list or was marked as belonging to the
* current large burst, then the current burst is deemed as finished and:
*
* . the large-burst mode is reset if set
*
* . the burst list is emptied
*
* . Q is inserted in the burst list, as Q may be the first queue
* in a possible new burst (then the burst list contains just Q
* after this step).
*/
static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
{
/*
* If bfqq is already in the burst list or is part of a large
* burst, or finally has just been split, then there is
* nothing else to do.
*/
if (!hlist_unhashed(&bfqq->burst_list_node) ||
bfq_bfqq_in_large_burst(bfqq) ||
time_is_after_eq_jiffies(bfqq->split_time +
msecs_to_jiffies(10)))
return;
/*
* If bfqq's creation happens late enough, or bfqq belongs to
* a different group than the burst group, then the current
* burst is finished, and related data structures must be
* reset.
*
* In this respect, consider the special case where bfqq is
* the very first queue created after BFQ is selected for this
* device. In this case, last_ins_in_burst and
* burst_parent_entity are not yet significant when we get
* here. But it is easy to verify that, whether or not the
* following condition is true, bfqq will end up being
* inserted into the burst list. In particular the list will
* happen to contain only bfqq. And this is exactly what has
* to happen, as bfqq may be the first queue of the first
* burst.
*/
if (time_is_before_jiffies(bfqd->last_ins_in_burst +
bfqd->bfq_burst_interval) ||
bfqq->entity.parent != bfqd->burst_parent_entity) {
bfqd->large_burst = false;
bfq_reset_burst_list(bfqd, bfqq);
goto end;
}
/*
* If we get here, then bfqq is being activated shortly after the
* last queue. So, if the current burst is also large, we can mark
* bfqq as belonging to this large burst immediately.
*/
if (bfqd->large_burst) {
bfq_mark_bfqq_in_large_burst(bfqq);
goto end;
}
/*
* If we get here, then a large-burst state has not yet been
* reached, but bfqq is being activated shortly after the last
* queue. Then we add bfqq to the burst.
*/
bfq_add_to_burst(bfqd, bfqq);
end:
/*
* At this point, bfqq either has been added to the current
* burst or has caused the current burst to terminate and a
* possible new burst to start. In particular, in the second
* case, bfqq has become the first queue in the possible new
* burst. In both cases last_ins_in_burst needs to be moved
* forward.
*/
bfqd->last_ins_in_burst = jiffies;
}
static int bfq_bfqq_budget_left(struct bfq_queue *bfqq) static int bfq_bfqq_budget_left(struct bfq_queue *bfqq)
{ {
struct bfq_entity *entity = &bfqq->entity; struct bfq_entity *entity = &bfqq->entity;
...@@ -4534,6 +4812,7 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd, ...@@ -4534,6 +4812,7 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd,
unsigned int old_wr_coeff, unsigned int old_wr_coeff,
bool wr_or_deserves_wr, bool wr_or_deserves_wr,
bool interactive, bool interactive,
bool in_burst,
bool soft_rt) bool soft_rt)
{ {
if (old_wr_coeff == 1 && wr_or_deserves_wr) { if (old_wr_coeff == 1 && wr_or_deserves_wr) {
...@@ -4565,7 +4844,9 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd, ...@@ -4565,7 +4844,9 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd,
if (interactive) { /* update wr coeff and duration */ if (interactive) { /* update wr coeff and duration */
bfqq->wr_coeff = bfqd->bfq_wr_coeff; bfqq->wr_coeff = bfqd->bfq_wr_coeff;
bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
} else if (soft_rt) { } else if (in_burst)
bfqq->wr_coeff = 1;
else if (soft_rt) {
/* /*
* The application is now or still meeting the * The application is now or still meeting the
* requirements for being deemed soft rt. We * requirements for being deemed soft rt. We
...@@ -4625,7 +4906,8 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, ...@@ -4625,7 +4906,8 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
struct request *rq, struct request *rq,
bool *interactive) bool *interactive)
{ {
bool soft_rt, wr_or_deserves_wr, bfqq_wants_to_preempt, bool soft_rt, in_burst, wr_or_deserves_wr,
bfqq_wants_to_preempt,
idle_for_long_time = bfq_bfqq_idle_for_long_time(bfqd, bfqq), idle_for_long_time = bfq_bfqq_idle_for_long_time(bfqd, bfqq),
/* /*
* See the comments on * See the comments on
...@@ -4641,12 +4923,15 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, ...@@ -4641,12 +4923,15 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
/* /*
* bfqq deserves to be weight-raised if: * bfqq deserves to be weight-raised if:
* - it is sync, * - it is sync,
* - it does not belong to a large burst,
* - it has been idle for enough time or is soft real-time, * - it has been idle for enough time or is soft real-time,
* - is linked to a bfq_io_cq (it is not shared in any sense). * - is linked to a bfq_io_cq (it is not shared in any sense).
*/ */
in_burst = bfq_bfqq_in_large_burst(bfqq);
soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
!in_burst &&
time_is_before_jiffies(bfqq->soft_rt_next_start); time_is_before_jiffies(bfqq->soft_rt_next_start);
*interactive = idle_for_long_time; *interactive = !in_burst && idle_for_long_time;
wr_or_deserves_wr = bfqd->low_latency && wr_or_deserves_wr = bfqd->low_latency &&
(bfqq->wr_coeff > 1 || (bfqq->wr_coeff > 1 ||
(bfq_bfqq_sync(bfqq) && (bfq_bfqq_sync(bfqq) &&
...@@ -4661,6 +4946,31 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, ...@@ -4661,6 +4946,31 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
arrived_in_time, arrived_in_time,
wr_or_deserves_wr); wr_or_deserves_wr);
/*
* If bfqq happened to be activated in a burst, but has been
* idle for much more than an interactive queue, then we
* assume that, in the overall I/O initiated in the burst, the
* I/O associated with bfqq is finished. So bfqq does not need
* to be treated as a queue belonging to a burst
* anymore. Accordingly, we reset bfqq's in_large_burst flag
* if set, and remove bfqq from the burst list if it's
* there. We do not decrement burst_size, because the fact
* that bfqq does not need to belong to the burst list any
* more does not invalidate the fact that bfqq was created in
* a burst.
*/
if (likely(!bfq_bfqq_just_created(bfqq)) &&
idle_for_long_time &&
time_is_before_jiffies(
bfqq->budget_timeout +
msecs_to_jiffies(10000))) {
hlist_del_init(&bfqq->burst_list_node);
bfq_clear_bfqq_in_large_burst(bfqq);
}
bfq_clear_bfqq_just_created(bfqq);
if (!bfq_bfqq_IO_bound(bfqq)) { if (!bfq_bfqq_IO_bound(bfqq)) {
if (arrived_in_time) { if (arrived_in_time) {
bfqq->requests_within_timer++; bfqq->requests_within_timer++;
...@@ -4683,6 +4993,7 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, ...@@ -4683,6 +4993,7 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
old_wr_coeff, old_wr_coeff,
wr_or_deserves_wr, wr_or_deserves_wr,
*interactive, *interactive,
in_burst,
soft_rt); soft_rt);
if (old_wr_coeff != bfqq->wr_coeff) if (old_wr_coeff != bfqq->wr_coeff)
...@@ -5310,6 +5621,8 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) ...@@ -5310,6 +5621,8 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
bic->saved_ttime = bfqq->ttime; bic->saved_ttime = bfqq->ttime;
bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); bic->saved_idle_window = bfq_bfqq_idle_window(bfqq);
bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq);
bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node);
bic->saved_wr_coeff = bfqq->wr_coeff; bic->saved_wr_coeff = bfqq->wr_coeff;
bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt; bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt;
bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish; bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish;
...@@ -5345,7 +5658,8 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, ...@@ -5345,7 +5658,8 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
* where bfqq has just been created, but has not yet made it * where bfqq has just been created, but has not yet made it
* to be weight-raised (which may happen because EQM may merge * to be weight-raised (which may happen because EQM may merge
* bfqq even before bfq_add_request is executed for the first * bfqq even before bfq_add_request is executed for the first
* time for bfqq). * time for bfqq). Handling this case would however be very
* easy, thanks to the flag just_created.
*/ */
if (new_bfqq->wr_coeff == 1 && bfqq->wr_coeff > 1) { if (new_bfqq->wr_coeff == 1 && bfqq->wr_coeff > 1) {
new_bfqq->wr_coeff = bfqq->wr_coeff; new_bfqq->wr_coeff = bfqq->wr_coeff;
...@@ -6430,6 +6744,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) ...@@ -6430,6 +6744,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
{ {
struct bfq_data *bfqd = bfqq->bfqd; struct bfq_data *bfqd = bfqq->bfqd;
bool idling_boosts_thr, idling_boosts_thr_without_issues, bool idling_boosts_thr, idling_boosts_thr_without_issues,
idling_needed_for_service_guarantees,
asymmetric_scenario; asymmetric_scenario;
if (bfqd->strict_guarantees) if (bfqd->strict_guarantees)
...@@ -6609,6 +6924,23 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) ...@@ -6609,6 +6924,23 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
asymmetric_scenario = bfqq->wr_coeff > 1 || asymmetric_scenario = bfqq->wr_coeff > 1 ||
!bfq_symmetric_scenario(bfqd); !bfq_symmetric_scenario(bfqd);
/*
* Finally, there is a case where maximizing throughput is the
* best choice even if it may cause unfairness toward
* bfqq. Such a case is when bfqq became active in a burst of
* queue activations. Queues that became active during a large
* burst benefit only from throughput, as discussed in the
* comments on bfq_handle_burst. Thus, if bfqq became active
* in a burst and not idling the device maximizes throughput,
* then the device must no be idled, because not idling the
* device provides bfqq and all other queues in the burst with
* maximum benefit. Combining this and the above case, we can
* now establish when idling is actually needed to preserve
* service guarantees.
*/
idling_needed_for_service_guarantees =
asymmetric_scenario && !bfq_bfqq_in_large_burst(bfqq);
/* /*
* We have now all the components we need to compute the return * We have now all the components we need to compute the return
* value of the function, which is true only if both the following * value of the function, which is true only if both the following
...@@ -6618,7 +6950,8 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) ...@@ -6618,7 +6950,8 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
* is necessary to preserve service guarantees. * is necessary to preserve service guarantees.
*/ */
return bfq_bfqq_sync(bfqq) && return bfq_bfqq_sync(bfqq) &&
(idling_boosts_thr_without_issues || asymmetric_scenario); (idling_boosts_thr_without_issues ||
idling_needed_for_service_guarantees);
} }
/* /*
...@@ -6757,14 +7090,17 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) ...@@ -6757,14 +7090,17 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change");
/* /*
* If too much time has elapsed from the beginning of * If the queue was activated in a burst, or too much
* this weight-raising period, then end weight raising. * time has elapsed from the beginning of this
* weight-raising period, then end weight raising.
*/ */
if (time_is_before_jiffies(bfqq->last_wr_start_finish + if (bfq_bfqq_in_large_burst(bfqq))
bfqq->wr_cur_max_time)) { bfq_bfqq_end_wr(bfqq);
else if (time_is_before_jiffies(bfqq->last_wr_start_finish +
bfqq->wr_cur_max_time)) {
if (bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time || if (bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time ||
time_is_before_jiffies(bfqq->wr_start_at_switch_to_srt + time_is_before_jiffies(bfqq->wr_start_at_switch_to_srt +
bfq_wr_duration(bfqd))) bfq_wr_duration(bfqd)))
bfq_bfqq_end_wr(bfqq); bfq_bfqq_end_wr(bfqq);
else { else {
/* switch back to interactive wr */ /* switch back to interactive wr */
...@@ -6962,7 +7298,16 @@ static void bfq_put_queue(struct bfq_queue *bfqq) ...@@ -6962,7 +7298,16 @@ static void bfq_put_queue(struct bfq_queue *bfqq)
if (bfqq->ref) if (bfqq->ref)
return; return;
bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p freed", bfqq); if (bfq_bfqq_sync(bfqq))
/*
* The fact that this queue is being destroyed does not
* invalidate the fact that this queue may have been
* activated during the current burst. As a consequence,
* although the queue does not exist anymore, and hence
* needs to be removed from the burst list if there,
* the burst size has not to be decremented.
*/
hlist_del_init(&bfqq->burst_list_node);
kmem_cache_free(bfq_pool, bfqq); kmem_cache_free(bfq_pool, bfqq);
#ifdef CONFIG_BFQ_GROUP_IOSCHED #ifdef CONFIG_BFQ_GROUP_IOSCHED
...@@ -7124,6 +7469,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, ...@@ -7124,6 +7469,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
{ {
RB_CLEAR_NODE(&bfqq->entity.rb_node); RB_CLEAR_NODE(&bfqq->entity.rb_node);
INIT_LIST_HEAD(&bfqq->fifo); INIT_LIST_HEAD(&bfqq->fifo);
INIT_HLIST_NODE(&bfqq->burst_list_node);
bfqq->ref = 0; bfqq->ref = 0;
bfqq->bfqd = bfqd; bfqq->bfqd = bfqd;
...@@ -7135,6 +7481,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, ...@@ -7135,6 +7481,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (!bfq_class_idle(bfqq)) if (!bfq_class_idle(bfqq))
bfq_mark_bfqq_idle_window(bfqq); bfq_mark_bfqq_idle_window(bfqq);
bfq_mark_bfqq_sync(bfqq); bfq_mark_bfqq_sync(bfqq);
bfq_mark_bfqq_just_created(bfqq);
} else } else
bfq_clear_bfqq_sync(bfqq); bfq_clear_bfqq_sync(bfqq);
...@@ -7400,6 +7747,7 @@ static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) ...@@ -7400,6 +7747,7 @@ static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
new_bfqq->allocated++; new_bfqq->allocated++;
bfqq->allocated--; bfqq->allocated--;
new_bfqq->ref++; new_bfqq->ref++;
bfq_clear_bfqq_just_created(bfqq);
/* /*
* If the bic associated with the process * If the bic associated with the process
* issuing this request still points to bfqq * issuing this request still points to bfqq
...@@ -7680,8 +8028,18 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd, ...@@ -7680,8 +8028,18 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
bfqq = bfq_get_queue(bfqd, bio, is_sync, bic); bfqq = bfq_get_queue(bfqd, bio, is_sync, bic);
bic_set_bfqq(bic, bfqq, is_sync); bic_set_bfqq(bic, bfqq, is_sync);
if (split && is_sync) if (split && is_sync) {
if ((bic->was_in_burst_list && bfqd->large_burst) ||
bic->saved_in_large_burst)
bfq_mark_bfqq_in_large_burst(bfqq);
else {
bfq_clear_bfqq_in_large_burst(bfqq);
if (bic->was_in_burst_list)
hlist_add_head(&bfqq->burst_list_node,
&bfqd->burst_list);
}
bfqq->split_time = jiffies; bfqq->split_time = jiffies;
}
return bfqq; return bfqq;
} }
...@@ -7714,6 +8072,11 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq, ...@@ -7714,6 +8072,11 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
/* If the queue was seeky for too long, break it apart. */ /* If the queue was seeky for too long, break it apart. */
if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) {
bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq");
/* Update bic before losing reference to bfqq */
if (bfq_bfqq_in_large_burst(bfqq))
bic->saved_in_large_burst = true;
bfqq = bfq_split_bfqq(bic, bfqq); bfqq = bfq_split_bfqq(bic, bfqq);
/* /*
* A reference to bic->icq.ioc needs to be * A reference to bic->icq.ioc needs to be
...@@ -7757,6 +8120,9 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq, ...@@ -7757,6 +8120,9 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
} }
} }
if (unlikely(bfq_bfqq_just_created(bfqq)))
bfq_handle_burst(bfqd, bfqq);
bfq_unlock_put_ioc(bfqd); bfq_unlock_put_ioc(bfqd);
return 0; return 0;
...@@ -7936,6 +8302,10 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) ...@@ -7936,6 +8302,10 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE; bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE;
bfqd->oom_bfqq.entity.new_weight = bfqd->oom_bfqq.entity.new_weight =
bfq_ioprio_to_weight(bfqd->oom_bfqq.new_ioprio); bfq_ioprio_to_weight(bfqd->oom_bfqq.new_ioprio);
/* oom_bfqq does not participate to bursts */
bfq_clear_bfqq_just_created(&bfqd->oom_bfqq);
/* /*
* Trigger weight initialization, according to ioprio, at the * Trigger weight initialization, according to ioprio, at the
* oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio
...@@ -7956,6 +8326,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) ...@@ -7956,6 +8326,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
INIT_LIST_HEAD(&bfqd->active_list); INIT_LIST_HEAD(&bfqd->active_list);
INIT_LIST_HEAD(&bfqd->idle_list); INIT_LIST_HEAD(&bfqd->idle_list);
INIT_HLIST_HEAD(&bfqd->burst_list);
bfqd->hw_tag = -1; bfqd->hw_tag = -1;
...@@ -7970,6 +8341,9 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) ...@@ -7970,6 +8341,9 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
bfqd->bfq_requests_within_timer = 120; bfqd->bfq_requests_within_timer = 120;
bfqd->bfq_large_burst_thresh = 8;
bfqd->bfq_burst_interval = msecs_to_jiffies(180);
bfqd->low_latency = true; bfqd->low_latency = true;
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment