Commit 80ce63d3 authored by Jens Axboe's avatar Jens Axboe Committed by Linus Torvalds

[PATCH] possible rq starvation on oom

I stumbled across this the other day. The block layer only uses a single
memory pool for request allocation, so it's very possible for eg writes
to have allocated them all at any point in time. If that is the case and
the machine is low on memory, a reader attempting to allocate a request
and failing in blk_alloc_request() can get stuck for a long time since
no one is there to wake it up.

The solution is either to add the extra mempool so both reads and writes
have one, or attempt to handle the situation. I chose the latter, to
save the extra memory required for the additional mempool with
BLKDEV_MIN_RQ statically allocated requests per-queue.

If a read allocation fails and we have no readers in flight for this
queue, mark us rq-starved so that the next write being freed will wake
up the sleeping reader(s). Same situation would happen for writes as
well of course, it's just a lot more unlikely.
Signed-off-by: default avatarJens Axboe <axboe@suse.de>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 6ddb58de
...@@ -1438,6 +1438,7 @@ static int blk_init_free_list(request_queue_t *q) ...@@ -1438,6 +1438,7 @@ static int blk_init_free_list(request_queue_t *q)
struct request_list *rl = &q->rq; struct request_list *rl = &q->rq;
rl->count[READ] = rl->count[WRITE] = 0; rl->count[READ] = rl->count[WRITE] = 0;
rl->starved[READ] = rl->starved[WRITE] = 0;
init_waitqueue_head(&rl->wait[READ]); init_waitqueue_head(&rl->wait[READ]);
init_waitqueue_head(&rl->wait[WRITE]); init_waitqueue_head(&rl->wait[WRITE]);
init_waitqueue_head(&rl->drain); init_waitqueue_head(&rl->drain);
...@@ -1618,6 +1619,22 @@ void ioc_set_batching(request_queue_t *q, struct io_context *ioc) ...@@ -1618,6 +1619,22 @@ void ioc_set_batching(request_queue_t *q, struct io_context *ioc)
ioc->last_waited = jiffies; ioc->last_waited = jiffies;
} }
static void __freed_request(request_queue_t *q, int rw)
{
struct request_list *rl = &q->rq;
if (rl->count[rw] < queue_congestion_off_threshold(q))
clear_queue_congested(q, rw);
if (rl->count[rw] + 1 <= q->nr_requests) {
smp_mb();
if (waitqueue_active(&rl->wait[rw]))
wake_up(&rl->wait[rw]);
blk_clear_queue_full(q, rw);
}
}
/* /*
* A request has just been released. Account for it, update the full and * A request has just been released. Account for it, update the full and
* congestion status, wake up any waiters. Called under q->queue_lock. * congestion status, wake up any waiters. Called under q->queue_lock.
...@@ -1627,17 +1644,17 @@ static void freed_request(request_queue_t *q, int rw) ...@@ -1627,17 +1644,17 @@ static void freed_request(request_queue_t *q, int rw)
struct request_list *rl = &q->rq; struct request_list *rl = &q->rq;
rl->count[rw]--; rl->count[rw]--;
if (rl->count[rw] < queue_congestion_off_threshold(q))
clear_queue_congested(q, rw); __freed_request(q, rw);
if (rl->count[rw]+1 <= q->nr_requests) {
if (unlikely(rl->starved[rw ^ 1]))
__freed_request(q, rw ^ 1);
if (!rl->count[READ] && !rl->count[WRITE]) {
smp_mb(); smp_mb();
if (waitqueue_active(&rl->wait[rw])) if (unlikely(waitqueue_active(&rl->drain)))
wake_up(&rl->wait[rw]); wake_up(&rl->drain);
blk_clear_queue_full(q, rw);
} }
if (unlikely(waitqueue_active(&rl->drain)) &&
!rl->count[READ] && !rl->count[WRITE])
wake_up(&rl->drain);
} }
#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
...@@ -1669,8 +1686,7 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) ...@@ -1669,8 +1686,7 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
switch (elv_may_queue(q, rw)) { switch (elv_may_queue(q, rw)) {
case ELV_MQUEUE_NO: case ELV_MQUEUE_NO:
spin_unlock_irq(q->queue_lock); goto rq_starved;
goto out;
case ELV_MQUEUE_MAY: case ELV_MQUEUE_MAY:
break; break;
case ELV_MQUEUE_MUST: case ELV_MQUEUE_MUST:
...@@ -1688,6 +1704,7 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) ...@@ -1688,6 +1704,7 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
get_rq: get_rq:
rl->count[rw]++; rl->count[rw]++;
rl->starved[rw] = 0;
if (rl->count[rw] >= queue_congestion_on_threshold(q)) if (rl->count[rw] >= queue_congestion_on_threshold(q))
set_queue_congested(q, rw); set_queue_congested(q, rw);
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
...@@ -1703,6 +1720,18 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) ...@@ -1703,6 +1720,18 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
*/ */
spin_lock_irq(q->queue_lock); spin_lock_irq(q->queue_lock);
freed_request(q, rw); freed_request(q, rw);
/*
* in the very unlikely event that allocation failed and no
* requests for this direction was pending, mark us starved
* so that freeing of a request in the other direction will
* notice us. another possible fix would be to split the
* rq mempool into READ and WRITE
*/
rq_starved:
if (unlikely(rl->count[rw] == 0))
rl->starved[rw] = 1;
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
goto out; goto out;
} }
......
...@@ -95,6 +95,7 @@ void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); ...@@ -95,6 +95,7 @@ void swap_io_context(struct io_context **ioc1, struct io_context **ioc2);
struct request_list { struct request_list {
int count[2]; int count[2];
int starved[2];
mempool_t *rq_pool; mempool_t *rq_pool;
wait_queue_head_t wait[2]; wait_queue_head_t wait[2];
wait_queue_head_t drain; wait_queue_head_t drain;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment