Commit 3b046a97 authored by Robert LeBlanc's avatar Robert LeBlanc Committed by Shaohua Li

md/raid1: Refactor raid1_make_request

Refactor raid1_make_request to make read and write code in their own
functions to clean up the code.
Signed-off-by: default avatarRobert LeBlanc <robert@leblancnet.us>
Signed-off-by: default avatarShaohua Li <shli@fb.com>
parent 0c744ea4
...@@ -1066,17 +1066,107 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) ...@@ -1066,17 +1066,107 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
kfree(plug); kfree(plug);
} }
static void raid1_make_request(struct mddev *mddev, struct bio * bio) static void raid1_read_request(struct mddev *mddev, struct bio *bio,
struct r1bio *r1_bio)
{ {
struct r1conf *conf = mddev->private; struct r1conf *conf = mddev->private;
struct raid1_info *mirror; struct raid1_info *mirror;
struct r1bio *r1_bio;
struct bio *read_bio; struct bio *read_bio;
struct bitmap *bitmap = mddev->bitmap;
const int op = bio_op(bio);
const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
int sectors_handled;
int max_sectors;
int rdisk;
wait_barrier(conf, bio);
read_again:
rdisk = read_balance(conf, r1_bio, &max_sectors);
if (rdisk < 0) {
/* couldn't find anywhere to read from */
raid_end_bio_io(r1_bio);
return;
}
mirror = conf->mirrors + rdisk;
if (test_bit(WriteMostly, &mirror->rdev->flags) &&
bitmap) {
/*
* Reading from a write-mostly device must take care not to
* over-take any writes that are 'behind'
*/
raid1_log(mddev, "wait behind writes");
wait_event(bitmap->behind_wait,
atomic_read(&bitmap->behind_writes) == 0);
}
r1_bio->read_disk = rdisk;
r1_bio->start_next_window = 0;
read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
max_sectors);
r1_bio->bios[rdisk] = read_bio;
read_bio->bi_iter.bi_sector = r1_bio->sector +
mirror->rdev->data_offset;
read_bio->bi_bdev = mirror->rdev->bdev;
read_bio->bi_end_io = raid1_end_read_request;
bio_set_op_attrs(read_bio, op, do_sync);
if (test_bit(FailFast, &mirror->rdev->flags) &&
test_bit(R1BIO_FailFast, &r1_bio->state))
read_bio->bi_opf |= MD_FAILFAST;
read_bio->bi_private = r1_bio;
if (mddev->gendisk)
trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
read_bio, disk_devt(mddev->gendisk),
r1_bio->sector);
if (max_sectors < r1_bio->sectors) {
/*
* could not read all from this device, so we will need another
* r1_bio.
*/
sectors_handled = (r1_bio->sector + max_sectors
- bio->bi_iter.bi_sector);
r1_bio->sectors = max_sectors;
spin_lock_irq(&conf->device_lock);
if (bio->bi_phys_segments == 0)
bio->bi_phys_segments = 2;
else
bio->bi_phys_segments++;
spin_unlock_irq(&conf->device_lock);
/*
* Cannot call generic_make_request directly as that will be
* queued in __make_request and subsequent mempool_alloc might
* block waiting for it. So hand bio over to raid1d.
*/
reschedule_retry(r1_bio);
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
r1_bio->master_bio = bio;
r1_bio->sectors = bio_sectors(bio) - sectors_handled;
r1_bio->state = 0;
r1_bio->mddev = mddev;
r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
goto read_again;
} else
generic_make_request(read_bio);
}
static void raid1_write_request(struct mddev *mddev, struct bio *bio,
struct r1bio *r1_bio)
{
struct r1conf *conf = mddev->private;
int i, disks; int i, disks;
struct bitmap *bitmap; struct bitmap *bitmap = mddev->bitmap;
unsigned long flags; unsigned long flags;
const int op = bio_op(bio); const int op = bio_op(bio);
const int rw = bio_data_dir(bio);
const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
const unsigned long do_flush_fua = (bio->bi_opf & const unsigned long do_flush_fua = (bio->bi_opf &
(REQ_PREFLUSH | REQ_FUA)); (REQ_PREFLUSH | REQ_FUA));
...@@ -1096,15 +1186,15 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio) ...@@ -1096,15 +1186,15 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
md_write_start(mddev, bio); /* wait on superblock update early */ md_write_start(mddev, bio); /* wait on superblock update early */
if (bio_data_dir(bio) == WRITE && if ((bio_end_sector(bio) > mddev->suspend_lo &&
((bio_end_sector(bio) > mddev->suspend_lo &&
bio->bi_iter.bi_sector < mddev->suspend_hi) || bio->bi_iter.bi_sector < mddev->suspend_hi) ||
(mddev_is_clustered(mddev) && (mddev_is_clustered(mddev) &&
md_cluster_ops->area_resyncing(mddev, WRITE, md_cluster_ops->area_resyncing(mddev, WRITE,
bio->bi_iter.bi_sector, bio_end_sector(bio))))) { bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
/* As the suspend_* range is controlled by
* userspace, we want an interruptible /*
* wait. * As the suspend_* range is controlled by userspace, we want
* an interruptible wait.
*/ */
DEFINE_WAIT(w); DEFINE_WAIT(w);
for (;;) { for (;;) {
...@@ -1115,128 +1205,15 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio) ...@@ -1115,128 +1205,15 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
bio->bi_iter.bi_sector >= mddev->suspend_hi || bio->bi_iter.bi_sector >= mddev->suspend_hi ||
(mddev_is_clustered(mddev) && (mddev_is_clustered(mddev) &&
!md_cluster_ops->area_resyncing(mddev, WRITE, !md_cluster_ops->area_resyncing(mddev, WRITE,
bio->bi_iter.bi_sector, bio_end_sector(bio)))) bio->bi_iter.bi_sector,
bio_end_sector(bio))))
break; break;
schedule(); schedule();
} }
finish_wait(&conf->wait_barrier, &w); finish_wait(&conf->wait_barrier, &w);
} }
start_next_window = wait_barrier(conf, bio); start_next_window = wait_barrier(conf, bio);
bitmap = mddev->bitmap;
/*
* make_request() can abort the operation when read-ahead is being
* used and no empty request is available.
*
*/
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
r1_bio->master_bio = bio;
r1_bio->sectors = bio_sectors(bio);
r1_bio->state = 0;
r1_bio->mddev = mddev;
r1_bio->sector = bio->bi_iter.bi_sector;
/* We might need to issue multiple reads to different
* devices if there are bad blocks around, so we keep
* track of the number of reads in bio->bi_phys_segments.
* If this is 0, there is only one r1_bio and no locking
* will be needed when requests complete. If it is
* non-zero, then it is the number of not-completed requests.
*/
bio->bi_phys_segments = 0;
bio_clear_flag(bio, BIO_SEG_VALID);
if (rw == READ) {
/*
* read balancing logic:
*/
int rdisk;
read_again:
rdisk = read_balance(conf, r1_bio, &max_sectors);
if (rdisk < 0) {
/* couldn't find anywhere to read from */
raid_end_bio_io(r1_bio);
return;
}
mirror = conf->mirrors + rdisk;
if (test_bit(WriteMostly, &mirror->rdev->flags) &&
bitmap) {
/* Reading from a write-mostly device must
* take care not to over-take any writes
* that are 'behind'
*/
raid1_log(mddev, "wait behind writes");
wait_event(bitmap->behind_wait,
atomic_read(&bitmap->behind_writes) == 0);
}
r1_bio->read_disk = rdisk;
r1_bio->start_next_window = 0;
read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
max_sectors);
r1_bio->bios[rdisk] = read_bio;
read_bio->bi_iter.bi_sector = r1_bio->sector +
mirror->rdev->data_offset;
read_bio->bi_bdev = mirror->rdev->bdev;
read_bio->bi_end_io = raid1_end_read_request;
bio_set_op_attrs(read_bio, op, do_sync);
if (test_bit(FailFast, &mirror->rdev->flags) &&
test_bit(R1BIO_FailFast, &r1_bio->state))
read_bio->bi_opf |= MD_FAILFAST;
read_bio->bi_private = r1_bio;
if (mddev->gendisk)
trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
read_bio, disk_devt(mddev->gendisk),
r1_bio->sector);
if (max_sectors < r1_bio->sectors) {
/* could not read all from this device, so we will
* need another r1_bio.
*/
sectors_handled = (r1_bio->sector + max_sectors
- bio->bi_iter.bi_sector);
r1_bio->sectors = max_sectors;
spin_lock_irq(&conf->device_lock);
if (bio->bi_phys_segments == 0)
bio->bi_phys_segments = 2;
else
bio->bi_phys_segments++;
spin_unlock_irq(&conf->device_lock);
/* Cannot call generic_make_request directly
* as that will be queued in __make_request
* and subsequent mempool_alloc might block waiting
* for it. So hand bio over to raid1d.
*/
reschedule_retry(r1_bio);
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
r1_bio->master_bio = bio;
r1_bio->sectors = bio_sectors(bio) - sectors_handled;
r1_bio->state = 0;
r1_bio->mddev = mddev;
r1_bio->sector = bio->bi_iter.bi_sector +
sectors_handled;
goto read_again;
} else
generic_make_request(read_bio);
return;
}
/*
* WRITE:
*/
if (conf->pending_count >= max_queued_requests) { if (conf->pending_count >= max_queued_requests) {
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
raid1_log(mddev, "wait queued"); raid1_log(mddev, "wait queued");
...@@ -1280,8 +1257,7 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio) ...@@ -1280,8 +1257,7 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
int bad_sectors; int bad_sectors;
int is_bad; int is_bad;
is_bad = is_badblock(rdev, r1_bio->sector, is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
max_sectors,
&first_bad, &bad_sectors); &first_bad, &bad_sectors);
if (is_bad < 0) { if (is_bad < 0) {
/* mustn't write here until the bad block is /* mustn't write here until the bad block is
...@@ -1370,7 +1346,8 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio) ...@@ -1370,7 +1346,8 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
continue; continue;
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, max_sectors); bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector,
max_sectors);
if (first_clone) { if (first_clone) {
/* do behind I/O ? /* do behind I/O ?
...@@ -1464,6 +1441,40 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio) ...@@ -1464,6 +1441,40 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
wake_up(&conf->wait_barrier); wake_up(&conf->wait_barrier);
} }
static void raid1_make_request(struct mddev *mddev, struct bio *bio)
{
struct r1conf *conf = mddev->private;
struct r1bio *r1_bio;
/*
* make_request() can abort the operation when read-ahead is being
* used and no empty request is available.
*
*/
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
r1_bio->master_bio = bio;
r1_bio->sectors = bio_sectors(bio);
r1_bio->state = 0;
r1_bio->mddev = mddev;
r1_bio->sector = bio->bi_iter.bi_sector;
/*
* We might need to issue multiple reads to different devices if there
* are bad blocks around, so we keep track of the number of reads in
* bio->bi_phys_segments. If this is 0, there is only one r1_bio and
* no locking will be needed when requests complete. If it is
* non-zero, then it is the number of not-completed requests.
*/
bio->bi_phys_segments = 0;
bio_clear_flag(bio, BIO_SEG_VALID);
if (bio_data_dir(bio) == READ)
raid1_read_request(mddev, bio, r1_bio);
else
raid1_write_request(mddev, bio, r1_bio);
}
static void raid1_status(struct seq_file *seq, struct mddev *mddev) static void raid1_status(struct seq_file *seq, struct mddev *mddev)
{ {
struct r1conf *conf = mddev->private; struct r1conf *conf = mddev->private;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment