Commit be4d3280 authored by Shaohua Li's avatar Shaohua Li Committed by NeilBrown

md/raid1: make sequential read detection per disk based

Currently the sequential read detection is global wide. It's natural to make it
per disk based, which can improve the detection for concurrent multiple
sequential reads. And next patch will make SSD read balance not use distance
based algorithm, where this change help detect truly sequential read for SSD.
Signed-off-by: default avatarShaohua Li <shli@fusionio.com>
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent cc4d1efd
...@@ -497,9 +497,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect ...@@ -497,9 +497,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
const sector_t this_sector = r1_bio->sector; const sector_t this_sector = r1_bio->sector;
int sectors; int sectors;
int best_good_sectors; int best_good_sectors;
int start_disk;
int best_disk; int best_disk;
int i; int disk;
sector_t best_dist; sector_t best_dist;
struct md_rdev *rdev; struct md_rdev *rdev;
int choose_first; int choose_first;
...@@ -517,23 +516,16 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect ...@@ -517,23 +516,16 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
best_good_sectors = 0; best_good_sectors = 0;
if (conf->mddev->recovery_cp < MaxSector && if (conf->mddev->recovery_cp < MaxSector &&
(this_sector + sectors >= conf->next_resync)) { (this_sector + sectors >= conf->next_resync))
choose_first = 1; choose_first = 1;
start_disk = 0; else
} else {
choose_first = 0; choose_first = 0;
start_disk = conf->last_used;
}
for (i = 0 ; i < conf->raid_disks * 2 ; i++) { for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
sector_t dist; sector_t dist;
sector_t first_bad; sector_t first_bad;
int bad_sectors; int bad_sectors;
int disk = start_disk + i;
if (disk >= conf->raid_disks * 2)
disk -= conf->raid_disks * 2;
rdev = rcu_dereference(conf->mirrors[disk].rdev); rdev = rcu_dereference(conf->mirrors[disk].rdev);
if (r1_bio->bios[disk] == IO_BLOCKED if (r1_bio->bios[disk] == IO_BLOCKED
|| rdev == NULL || rdev == NULL
...@@ -594,7 +586,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect ...@@ -594,7 +586,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
dist = abs(this_sector - conf->mirrors[disk].head_position); dist = abs(this_sector - conf->mirrors[disk].head_position);
if (choose_first if (choose_first
/* Don't change to another disk for sequential reads */ /* Don't change to another disk for sequential reads */
|| conf->next_seq_sect == this_sector || conf->mirrors[disk].next_seq_sect == this_sector
|| dist == 0 || dist == 0
/* If device is idle, use it */ /* If device is idle, use it */
|| atomic_read(&rdev->nr_pending) == 0) { || atomic_read(&rdev->nr_pending) == 0) {
...@@ -620,8 +612,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect ...@@ -620,8 +612,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
goto retry; goto retry;
} }
sectors = best_good_sectors; sectors = best_good_sectors;
conf->next_seq_sect = this_sector + sectors; conf->mirrors[best_disk].next_seq_sect = this_sector + sectors;
conf->last_used = best_disk;
} }
rcu_read_unlock(); rcu_read_unlock();
*max_sectors = sectors; *max_sectors = sectors;
...@@ -2599,7 +2590,6 @@ static struct r1conf *setup_conf(struct mddev *mddev) ...@@ -2599,7 +2590,6 @@ static struct r1conf *setup_conf(struct mddev *mddev)
conf->recovery_disabled = mddev->recovery_disabled - 1; conf->recovery_disabled = mddev->recovery_disabled - 1;
err = -EIO; err = -EIO;
conf->last_used = -1;
for (i = 0; i < conf->raid_disks * 2; i++) { for (i = 0; i < conf->raid_disks * 2; i++) {
disk = conf->mirrors + i; disk = conf->mirrors + i;
...@@ -2625,19 +2615,9 @@ static struct r1conf *setup_conf(struct mddev *mddev) ...@@ -2625,19 +2615,9 @@ static struct r1conf *setup_conf(struct mddev *mddev)
if (disk->rdev && if (disk->rdev &&
(disk->rdev->saved_raid_disk < 0)) (disk->rdev->saved_raid_disk < 0))
conf->fullsync = 1; conf->fullsync = 1;
} else if (conf->last_used < 0) }
/*
* The first working device is used as a
* starting point to read balancing.
*/
conf->last_used = i;
} }
if (conf->last_used < 0) {
printk(KERN_ERR "md/raid1:%s: no operational mirrors\n",
mdname(mddev));
goto abort;
}
err = -ENOMEM; err = -ENOMEM;
conf->thread = md_register_thread(raid1d, mddev, "raid1"); conf->thread = md_register_thread(raid1d, mddev, "raid1");
if (!conf->thread) { if (!conf->thread) {
...@@ -2894,7 +2874,6 @@ static int raid1_reshape(struct mddev *mddev) ...@@ -2894,7 +2874,6 @@ static int raid1_reshape(struct mddev *mddev)
conf->raid_disks = mddev->raid_disks = raid_disks; conf->raid_disks = mddev->raid_disks = raid_disks;
mddev->delta_disks = 0; mddev->delta_disks = 0;
conf->last_used = 0; /* just make sure it is in-range */
lower_barrier(conf); lower_barrier(conf);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
......
...@@ -4,6 +4,11 @@ ...@@ -4,6 +4,11 @@
struct raid1_info { struct raid1_info {
struct md_rdev *rdev; struct md_rdev *rdev;
sector_t head_position; sector_t head_position;
/* When choose the best device for a read (read_balance())
* we try to keep sequential reads one the same device
*/
sector_t next_seq_sect;
}; };
/* /*
...@@ -29,12 +34,6 @@ struct r1conf { ...@@ -29,12 +34,6 @@ struct r1conf {
*/ */
int raid_disks; int raid_disks;
/* When choose the best device for a read (read_balance())
* we try to keep sequential reads one the same device
* using 'last_used' and 'next_seq_sect'
*/
int last_used;
sector_t next_seq_sect;
/* During resync, read_balancing is only allowed on the part /* During resync, read_balancing is only allowed on the part
* of the array that has been resynced. 'next_resync' tells us * of the array that has been resynced. 'next_resync' tells us
* where that is. * where that is.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment