Commit 34db0cd6 authored by NeilBrown's avatar NeilBrown

md: add proper write-congestion reporting to RAID1 and RAID10.

RAID1 and RAID10 handle write requests by queuing them for handling by
a separate thread.  This is because when a write-intent-bitmap is
active we might need to update the bitmap first, so it is good to
queue a lot of writes, then do one big bitmap update for them all.

However writeback request devices to appear to be congested after a
while so it can make some guesstimate of throughput.  The infinite
queue defeats that (note that RAID5 has already has a finite queue so
it doesn't suffer from this problem).

So impose a limit on the number of pending write requests.  By default
it is 1024 which seems to be generally suitable.  Make it configurable
via module option just in case someone finds a regression.
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 84fc4b56
...@@ -45,6 +45,11 @@ ...@@ -45,6 +45,11 @@
*/ */
#define NR_RAID1_BIOS 256 #define NR_RAID1_BIOS 256
/* When there are this many requests queue to be written by
* the raid1 thread, we become 'congested' to provide back-pressure
* for writeback.
*/
static int max_queued_requests = 1024;
static void allow_barrier(struct r1conf *conf); static void allow_barrier(struct r1conf *conf);
static void lower_barrier(struct r1conf *conf); static void lower_barrier(struct r1conf *conf);
...@@ -598,6 +603,10 @@ int md_raid1_congested(struct mddev *mddev, int bits) ...@@ -598,6 +603,10 @@ int md_raid1_congested(struct mddev *mddev, int bits)
struct r1conf *conf = mddev->private; struct r1conf *conf = mddev->private;
int i, ret = 0; int i, ret = 0;
if ((bits & (1 << BDI_async_congested)) &&
conf->pending_count >= max_queued_requests)
return 1;
rcu_read_lock(); rcu_read_lock();
for (i = 0; i < mddev->raid_disks; i++) { for (i = 0; i < mddev->raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
...@@ -638,10 +647,12 @@ static void flush_pending_writes(struct r1conf *conf) ...@@ -638,10 +647,12 @@ static void flush_pending_writes(struct r1conf *conf)
if (conf->pending_bio_list.head) { if (conf->pending_bio_list.head) {
struct bio *bio; struct bio *bio;
bio = bio_list_get(&conf->pending_bio_list); bio = bio_list_get(&conf->pending_bio_list);
conf->pending_count = 0;
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
/* flush any pending bitmap writes to /* flush any pending bitmap writes to
* disk before proceeding w/ I/O */ * disk before proceeding w/ I/O */
bitmap_unplug(conf->mddev->bitmap); bitmap_unplug(conf->mddev->bitmap);
wake_up(&conf->wait_barrier);
while (bio) { /* submit pending writes */ while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next; struct bio *next = bio->bi_next;
...@@ -945,6 +956,11 @@ static int make_request(struct mddev *mddev, struct bio * bio) ...@@ -945,6 +956,11 @@ static int make_request(struct mddev *mddev, struct bio * bio)
/* /*
* WRITE: * WRITE:
*/ */
if (conf->pending_count >= max_queued_requests) {
md_wakeup_thread(mddev->thread);
wait_event(conf->wait_barrier,
conf->pending_count < max_queued_requests);
}
/* first select target devices under rcu_lock and /* first select target devices under rcu_lock and
* inc refcount on their rdev. Record them by setting * inc refcount on their rdev. Record them by setting
* bios[x] to bio * bios[x] to bio
...@@ -1108,6 +1124,7 @@ static int make_request(struct mddev *mddev, struct bio * bio) ...@@ -1108,6 +1124,7 @@ static int make_request(struct mddev *mddev, struct bio * bio)
atomic_inc(&r1_bio->remaining); atomic_inc(&r1_bio->remaining);
spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio); bio_list_add(&conf->pending_bio_list, mbio);
conf->pending_count++;
spin_unlock_irqrestore(&conf->device_lock, flags); spin_unlock_irqrestore(&conf->device_lock, flags);
} }
/* Mustn't call r1_bio_write_done before this next test, /* Mustn't call r1_bio_write_done before this next test,
...@@ -2418,6 +2435,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) ...@@ -2418,6 +2435,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
init_waitqueue_head(&conf->wait_barrier); init_waitqueue_head(&conf->wait_barrier);
bio_list_init(&conf->pending_bio_list); bio_list_init(&conf->pending_bio_list);
conf->pending_count = 0;
conf->last_used = -1; conf->last_used = -1;
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
...@@ -2776,3 +2794,5 @@ MODULE_DESCRIPTION("RAID1 (mirroring) personality for MD"); ...@@ -2776,3 +2794,5 @@ MODULE_DESCRIPTION("RAID1 (mirroring) personality for MD");
MODULE_ALIAS("md-personality-3"); /* RAID1 */ MODULE_ALIAS("md-personality-3"); /* RAID1 */
MODULE_ALIAS("md-raid1"); MODULE_ALIAS("md-raid1");
MODULE_ALIAS("md-level-1"); MODULE_ALIAS("md-level-1");
module_param(max_queued_requests, int, S_IRUGO|S_IWUSR);
...@@ -46,6 +46,7 @@ struct r1conf { ...@@ -46,6 +46,7 @@ struct r1conf {
/* queue pending writes to be submitted on unplug */ /* queue pending writes to be submitted on unplug */
struct bio_list pending_bio_list; struct bio_list pending_bio_list;
int pending_count;
/* for use when syncing mirrors: /* for use when syncing mirrors:
* We don't allow both normal IO and resync/recovery IO at * We don't allow both normal IO and resync/recovery IO at
......
...@@ -58,6 +58,12 @@ ...@@ -58,6 +58,12 @@
*/ */
#define NR_RAID10_BIOS 256 #define NR_RAID10_BIOS 256
/* When there are this many requests queue to be written by
* the raid10 thread, we become 'congested' to provide back-pressure
* for writeback.
*/
static int max_queued_requests = 1024;
static void allow_barrier(struct r10conf *conf); static void allow_barrier(struct r10conf *conf);
static void lower_barrier(struct r10conf *conf); static void lower_barrier(struct r10conf *conf);
...@@ -681,6 +687,10 @@ static int raid10_congested(void *data, int bits) ...@@ -681,6 +687,10 @@ static int raid10_congested(void *data, int bits)
struct r10conf *conf = mddev->private; struct r10conf *conf = mddev->private;
int i, ret = 0; int i, ret = 0;
if ((bits & (1 << BDI_async_congested)) &&
conf->pending_count >= max_queued_requests)
return 1;
if (mddev_congested(mddev, bits)) if (mddev_congested(mddev, bits))
return 1; return 1;
rcu_read_lock(); rcu_read_lock();
...@@ -706,10 +716,12 @@ static void flush_pending_writes(struct r10conf *conf) ...@@ -706,10 +716,12 @@ static void flush_pending_writes(struct r10conf *conf)
if (conf->pending_bio_list.head) { if (conf->pending_bio_list.head) {
struct bio *bio; struct bio *bio;
bio = bio_list_get(&conf->pending_bio_list); bio = bio_list_get(&conf->pending_bio_list);
conf->pending_count = 0;
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
/* flush any pending bitmap writes to disk /* flush any pending bitmap writes to disk
* before proceeding w/ I/O */ * before proceeding w/ I/O */
bitmap_unplug(conf->mddev->bitmap); bitmap_unplug(conf->mddev->bitmap);
wake_up(&conf->wait_barrier);
while (bio) { /* submit pending writes */ while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next; struct bio *next = bio->bi_next;
...@@ -996,6 +1008,11 @@ static int make_request(struct mddev *mddev, struct bio * bio) ...@@ -996,6 +1008,11 @@ static int make_request(struct mddev *mddev, struct bio * bio)
/* /*
* WRITE: * WRITE:
*/ */
if (conf->pending_count >= max_queued_requests) {
md_wakeup_thread(mddev->thread);
wait_event(conf->wait_barrier,
conf->pending_count < max_queued_requests);
}
/* first select target devices under rcu_lock and /* first select target devices under rcu_lock and
* inc refcount on their rdev. Record them by setting * inc refcount on their rdev. Record them by setting
* bios[x] to bio * bios[x] to bio
...@@ -1129,6 +1146,7 @@ static int make_request(struct mddev *mddev, struct bio * bio) ...@@ -1129,6 +1146,7 @@ static int make_request(struct mddev *mddev, struct bio * bio)
atomic_inc(&r10_bio->remaining); atomic_inc(&r10_bio->remaining);
spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio); bio_list_add(&conf->pending_bio_list, mbio);
conf->pending_count++;
spin_unlock_irqrestore(&conf->device_lock, flags); spin_unlock_irqrestore(&conf->device_lock, flags);
} }
...@@ -3086,3 +3104,5 @@ MODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD"); ...@@ -3086,3 +3104,5 @@ MODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD");
MODULE_ALIAS("md-personality-9"); /* RAID10 */ MODULE_ALIAS("md-personality-9"); /* RAID10 */
MODULE_ALIAS("md-raid10"); MODULE_ALIAS("md-raid10");
MODULE_ALIAS("md-level-10"); MODULE_ALIAS("md-level-10");
module_param(max_queued_requests, int, S_IRUGO|S_IWUSR);
...@@ -42,7 +42,7 @@ struct r10conf { ...@@ -42,7 +42,7 @@ struct r10conf {
struct list_head retry_list; struct list_head retry_list;
/* queue pending writes and submit them on unplug */ /* queue pending writes and submit them on unplug */
struct bio_list pending_bio_list; struct bio_list pending_bio_list;
int pending_count;
spinlock_t resync_lock; spinlock_t resync_lock;
int nr_pending; int nr_pending;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment