Commit 841c1316 authored by Ming Lei's avatar Ming Lei Committed by Shaohua Li

md: raid1: improve write behind

This patch improve handling of write behind in the following ways:

- introduce behind master bio to hold all write behind pages
- fast clone bios from behind master bio
- avoid to change bvec table directly
- use bio_copy_data() and make code more clean
Suggested-by: default avatarShaohua Li <shli@fb.com>
Signed-off-by: default avatarMing Lei <tom.leiming@gmail.com>
Signed-off-by: default avatarShaohua Li <shli@fb.com>
parent d8c84c4f
...@@ -388,12 +388,9 @@ static void close_write(struct r1bio *r1_bio) ...@@ -388,12 +388,9 @@ static void close_write(struct r1bio *r1_bio)
{ {
/* it really is the end of this request */ /* it really is the end of this request */
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
/* free extra copy of the data pages */ bio_free_pages(r1_bio->behind_master_bio);
int i = r1_bio->behind_page_count; bio_put(r1_bio->behind_master_bio);
while (i--) r1_bio->behind_master_bio = NULL;
safe_put_page(r1_bio->behind_bvecs[i].bv_page);
kfree(r1_bio->behind_bvecs);
r1_bio->behind_bvecs = NULL;
} }
/* clear the bitmap if all writes complete successfully */ /* clear the bitmap if all writes complete successfully */
bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
...@@ -495,6 +492,10 @@ static void raid1_end_write_request(struct bio *bio) ...@@ -495,6 +492,10 @@ static void raid1_end_write_request(struct bio *bio)
} }
if (behind) { if (behind) {
/* we release behind master bio when all write are done */
if (r1_bio->behind_master_bio == bio)
to_put = NULL;
if (test_bit(WriteMostly, &rdev->flags)) if (test_bit(WriteMostly, &rdev->flags))
atomic_dec(&r1_bio->behind_remaining); atomic_dec(&r1_bio->behind_remaining);
...@@ -1089,39 +1090,46 @@ static void unfreeze_array(struct r1conf *conf) ...@@ -1089,39 +1090,46 @@ static void unfreeze_array(struct r1conf *conf)
wake_up(&conf->wait_barrier); wake_up(&conf->wait_barrier);
} }
/* duplicate the data pages for behind I/O static struct bio *alloc_behind_master_bio(struct r1bio *r1_bio,
*/ struct bio *bio,
static void alloc_behind_pages(struct bio *bio, struct r1bio *r1_bio) int offset, int size)
{ {
int i; unsigned vcnt = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
struct bio_vec *bvec; int i = 0;
struct bio_vec *bvecs = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec), struct bio *behind_bio = NULL;
GFP_NOIO);
if (unlikely(!bvecs)) behind_bio = bio_alloc_mddev(GFP_NOIO, vcnt, r1_bio->mddev);
return; if (!behind_bio)
goto fail;
while (i < vcnt && size) {
struct page *page;
int len = min_t(int, PAGE_SIZE, size);
page = alloc_page(GFP_NOIO);
if (unlikely(!page))
goto free_pages;
bio_add_page(behind_bio, page, len, 0);
size -= len;
i++;
}
bio_for_each_segment_all(bvec, bio, i) { bio_copy_data_partial(behind_bio, bio, offset,
bvecs[i] = *bvec; behind_bio->bi_iter.bi_size);
bvecs[i].bv_page = alloc_page(GFP_NOIO);
if (unlikely(!bvecs[i].bv_page)) r1_bio->behind_master_bio = behind_bio;;
goto do_sync_io;
memcpy(kmap(bvecs[i].bv_page) + bvec->bv_offset,
kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
kunmap(bvecs[i].bv_page);
kunmap(bvec->bv_page);
}
r1_bio->behind_bvecs = bvecs;
r1_bio->behind_page_count = bio->bi_vcnt;
set_bit(R1BIO_BehindIO, &r1_bio->state); set_bit(R1BIO_BehindIO, &r1_bio->state);
return;
do_sync_io: return behind_bio;
for (i = 0; i < bio->bi_vcnt; i++)
if (bvecs[i].bv_page) free_pages:
put_page(bvecs[i].bv_page);
kfree(bvecs);
pr_debug("%dB behind alloc failed, doing sync I/O\n", pr_debug("%dB behind alloc failed, doing sync I/O\n",
bio->bi_iter.bi_size); bio->bi_iter.bi_size);
bio_free_pages(behind_bio);
fail:
return behind_bio;
} }
struct raid1_plug_cb { struct raid1_plug_cb {
...@@ -1457,11 +1465,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio) ...@@ -1457,11 +1465,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
(atomic_read(&bitmap->behind_writes) (atomic_read(&bitmap->behind_writes)
< mddev->bitmap_info.max_write_behind) && < mddev->bitmap_info.max_write_behind) &&
!waitqueue_active(&bitmap->behind_wait)) { !waitqueue_active(&bitmap->behind_wait)) {
mbio = bio_clone_bioset_partial(bio, GFP_NOIO, mbio = alloc_behind_master_bio(r1_bio, bio,
mddev->bio_set, offset << 9,
offset << 9, max_sectors << 9);
max_sectors << 9);
alloc_behind_pages(mbio, r1_bio);
} }
bitmap_startwrite(bitmap, r1_bio->sector, bitmap_startwrite(bitmap, r1_bio->sector,
...@@ -1472,26 +1478,17 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio) ...@@ -1472,26 +1478,17 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio)
} }
if (!mbio) { if (!mbio) {
if (r1_bio->behind_bvecs) if (r1_bio->behind_master_bio)
mbio = bio_clone_bioset_partial(bio, GFP_NOIO, mbio = bio_clone_fast(r1_bio->behind_master_bio,
mddev->bio_set, GFP_NOIO,
offset << 9, mddev->bio_set);
max_sectors << 9);
else { else {
mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
bio_trim(mbio, offset, max_sectors); bio_trim(mbio, offset, max_sectors);
} }
} }
if (r1_bio->behind_bvecs) { if (r1_bio->behind_master_bio) {
struct bio_vec *bvec;
int j;
/*
* We trimmed the bio, so _all is legit
*/
bio_for_each_segment_all(bvec, mbio, j)
bvec->bv_page = r1_bio->behind_bvecs[j].bv_page;
if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
atomic_inc(&r1_bio->behind_remaining); atomic_inc(&r1_bio->behind_remaining);
} }
...@@ -2386,18 +2383,11 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) ...@@ -2386,18 +2383,11 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
/* Write at 'sector' for 'sectors'*/ /* Write at 'sector' for 'sectors'*/
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
unsigned vcnt = r1_bio->behind_page_count; wbio = bio_clone_fast(r1_bio->behind_master_bio,
struct bio_vec *vec = r1_bio->behind_bvecs; GFP_NOIO,
mddev->bio_set);
while (!vec->bv_page) { /* We really need a _all clone */
vec++; wbio->bi_iter = (struct bvec_iter){ 0 };
vcnt--;
}
wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
wbio->bi_vcnt = vcnt;
} else { } else {
wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO, wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO,
mddev->bio_set); mddev->bio_set);
......
...@@ -153,9 +153,13 @@ struct r1bio { ...@@ -153,9 +153,13 @@ struct r1bio {
int read_disk; int read_disk;
struct list_head retry_list; struct list_head retry_list;
/* Next two are only valid when R1BIO_BehindIO is set */
struct bio_vec *behind_bvecs; /*
int behind_page_count; * When R1BIO_BehindIO is set, we store pages for write behind
* in behind_master_bio.
*/
struct bio *behind_master_bio;
/* /*
* if the IO is in WRITE direction, then multiple bios are used. * if the IO is in WRITE direction, then multiple bios are used.
* We choose the number when they are allocated. * We choose the number when they are allocated.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment