Commit 80af89ca authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] block batching fairness

From: Nick Piggin <piggin@cyberone.com.au>

This patch fixes the request batching fairness/starvation issue.  Its not
clear what is going on with 2.4, but it seems that its a problem around this
area.

Anyway, previously:

	* request queue fills up
	* process 1 calls get_request, sleeps
	* a couple of requests are freed
	* process 2 calls get_request, proceeds
	* a couple of requests are freed
	* process 2 calls get_request...

Now as unlikely as it seems, it could be a problem.  Its a fairness problem
that process 2 can skip ahead of process 1 anyway.

With the patch:

	* request queue fills up
	* any process calling get_request will sleep
	* once the queue gets below the batch watermark, processes
	  start being worken, and may allocate.


This patch includes Chris Mason's fix to only clear queue_full when all tasks
have been woken.  Previously I think starvation and unfairness could still
occur.

With this change to the blk-fair-batches patch, Chris is showing some much
improved numbers for 2.4 - 170 ms max wait vs 2700ms without blk-fair-batches
for a dbench 90 run.  He didn't indicate how much difference his patch alone
made, but it is an important fix I think.
parent f67198fb
...@@ -53,7 +53,7 @@ unsigned long blk_max_low_pfn, blk_max_pfn; ...@@ -53,7 +53,7 @@ unsigned long blk_max_low_pfn, blk_max_pfn;
static inline int batch_requests(struct request_queue *q) static inline int batch_requests(struct request_queue *q)
{ {
return q->nr_requests - min(q->nr_requests / 8, 8UL); return q->nr_requests - min(q->nr_requests / 8, 8UL) - 1;
} }
/* /*
...@@ -1309,13 +1309,16 @@ static inline struct request *blk_alloc_request(request_queue_t *q,int gfp_mask) ...@@ -1309,13 +1309,16 @@ static inline struct request *blk_alloc_request(request_queue_t *q,int gfp_mask)
/* /*
* Get a free request, queue_lock must not be held * Get a free request, queue_lock must not be held
*/ */
static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) static struct request *
get_request(request_queue_t *q, int rw, int gfp_mask, int force)
{ {
struct request *rq = NULL; struct request *rq = NULL;
struct request_list *rl = &q->rq; struct request_list *rl = &q->rq;
spin_lock_irq(q->queue_lock); spin_lock_irq(q->queue_lock);
if (rl->count[rw] >= q->nr_requests && !elv_may_queue(q, rw)) { if (rl->count[rw] == q->nr_requests)
blk_set_queue_full(q, rw);
if (blk_queue_full(q, rw) && !force && !elv_may_queue(q, rw)) {
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
goto out; goto out;
} }
...@@ -1330,6 +1333,14 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) ...@@ -1330,6 +1333,14 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
rl->count[rw]--; rl->count[rw]--;
if (rl->count[rw] < queue_congestion_off_threshold(q)) if (rl->count[rw] < queue_congestion_off_threshold(q))
clear_queue_congested(q, rw); clear_queue_congested(q, rw);
if (rl->count[rw] <= batch_requests(q)) {
if (waitqueue_active(&rl->wait[rw]))
wake_up(&rl->wait[rw]);
else
blk_clear_queue_full(q, rw);
}
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
goto out; goto out;
} }
...@@ -1366,26 +1377,22 @@ static struct request *get_request_wait(request_queue_t *q, int rw) ...@@ -1366,26 +1377,22 @@ static struct request *get_request_wait(request_queue_t *q, int rw)
{ {
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
struct request *rq; struct request *rq;
int waited = 0;
generic_unplug_device(q); generic_unplug_device(q);
do { do {
rq = get_request(q, rw, GFP_NOIO);
if (!rq) {
struct request_list *rl = &q->rq; struct request_list *rl = &q->rq;
prepare_to_wait_exclusive(&rl->wait[rw], &wait, prepare_to_wait_exclusive(&rl->wait[rw], &wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
/*
* If _all_ the requests were suddenly returned then rq = get_request(q, rw, GFP_NOIO, waited);
* no wakeup will be delivered. So now we're on the
* waitqueue, go check for that. if (!rq) {
*/
rq = get_request(q, rw, GFP_NOIO);
if (!rq)
io_schedule(); io_schedule();
finish_wait(&rl->wait[rw], &wait); waited = 1;
} }
finish_wait(&rl->wait[rw], &wait);
} while (!rq); } while (!rq);
return rq; return rq;
...@@ -1397,10 +1404,10 @@ struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask) ...@@ -1397,10 +1404,10 @@ struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask)
BUG_ON(rw != READ && rw != WRITE); BUG_ON(rw != READ && rw != WRITE);
rq = get_request(q, rw, gfp_mask); if (gfp_mask & __GFP_WAIT)
if (!rq && (gfp_mask & __GFP_WAIT))
rq = get_request_wait(q, rw); rq = get_request_wait(q, rw);
else
rq = get_request(q, rw, gfp_mask, 0);
return rq; return rq;
} }
...@@ -1551,9 +1558,13 @@ void __blk_put_request(request_queue_t *q, struct request *req) ...@@ -1551,9 +1558,13 @@ void __blk_put_request(request_queue_t *q, struct request *req)
rl->count[rw]--; rl->count[rw]--;
if (rl->count[rw] < queue_congestion_off_threshold(q)) if (rl->count[rw] < queue_congestion_off_threshold(q))
clear_queue_congested(q, rw); clear_queue_congested(q, rw);
if (rl->count[rw] < batch_requests(q) &&
waitqueue_active(&rl->wait[rw])) if (rl->count[rw] <= batch_requests(q)) {
if (waitqueue_active(&rl->wait[rw]))
wake_up(&rl->wait[rw]); wake_up(&rl->wait[rw]);
else
blk_clear_queue_full(q, rw);
}
} }
} }
...@@ -1796,7 +1807,7 @@ static int __make_request(request_queue_t *q, struct bio *bio) ...@@ -1796,7 +1807,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
freereq = NULL; freereq = NULL;
} else { } else {
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
if ((freereq = get_request(q, rw, GFP_ATOMIC)) == NULL) { if ((freereq = get_request(q, rw, GFP_ATOMIC, 0)) == NULL) {
/* /*
* READA bit set * READA bit set
*/ */
...@@ -1904,8 +1915,7 @@ static inline void blk_partition_remap(struct bio *bio) ...@@ -1904,8 +1915,7 @@ static inline void blk_partition_remap(struct bio *bio)
* bio happens to be merged with someone else, and may change bi_dev and * bio happens to be merged with someone else, and may change bi_dev and
* bi_sector for remaps as it sees fit. So the values of these fields * bi_sector for remaps as it sees fit. So the values of these fields
* should NOT be depended on after the call to generic_make_request. * should NOT be depended on after the call to generic_make_request.
* */
* */
void generic_make_request(struct bio *bio) void generic_make_request(struct bio *bio)
{ {
request_queue_t *q; request_queue_t *q;
...@@ -2415,6 +2425,19 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) ...@@ -2415,6 +2425,19 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) else if (rl->count[WRITE] < queue_congestion_off_threshold(q))
clear_queue_congested(q, WRITE); clear_queue_congested(q, WRITE);
if (rl->count[READ] >= q->nr_requests) {
blk_set_queue_full(q, READ);
} else if (rl->count[READ] <= batch_requests(q)) {
blk_clear_queue_full(q, READ);
wake_up_all(&rl->wait[READ]);
}
if (rl->count[WRITE] >= q->nr_requests) {
blk_set_queue_full(q, WRITE);
} else if (rl->count[WRITE] <= batch_requests(q)) {
blk_clear_queue_full(q, WRITE);
wake_up_all(&rl->wait[WRITE]);
}
return ret; return ret;
} }
......
...@@ -307,6 +307,8 @@ struct request_queue ...@@ -307,6 +307,8 @@ struct request_queue
#define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */
#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
#define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */
#define QUEUE_FLAG_READFULL 3 /* write queue has been filled */
#define QUEUE_FLAG_WRITEFULL 4 /* read queue has been filled */
#define blk_queue_plugged(q) !list_empty(&(q)->plug_list) #define blk_queue_plugged(q) !list_empty(&(q)->plug_list)
#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
...@@ -322,6 +324,30 @@ struct request_queue ...@@ -322,6 +324,30 @@ struct request_queue
#define rq_data_dir(rq) ((rq)->flags & 1) #define rq_data_dir(rq) ((rq)->flags & 1)
static inline int blk_queue_full(struct request_queue *q, int rw)
{
if (rw == READ)
return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
}
static inline void blk_set_queue_full(struct request_queue *q, int rw)
{
if (rw == READ)
set_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
else
set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
}
static inline void blk_clear_queue_full(struct request_queue *q, int rw)
{
if (rw == READ)
clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
else
clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
}
/* /*
* mergeable request must not have _NOMERGE or _BARRIER bit set, nor may * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may
* it already be started by driver. * it already be started by driver.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment