Commit c687297b authored by Yu Kuai's avatar Yu Kuai Committed by Song Liu

md: also clone new io if io accounting is disabled

Currently, 'active_io' is grabbed before make_reqeust() is called, and
it's dropped immediately make_reqeust() returns. Hence 'active_io'
actually means io is dispatching, not io is inflight.

For raid0 and raid456 that io accounting is enabled, 'active_io' will
also be grabbed when bio is cloned for io accounting, and this 'active_io'
is dropped until io is done.

Always clone new bio so that 'active_io' will mean that io is inflight,
raid1 and raid10 will switch to use this method in later patches.

Now that bio will be cloned even if io accounting is disabled, also
rename related structure from '*_acct_*' to '*_clone_*'.
Signed-off-by: default avatarYu Kuai <yukuai3@huawei.com>
Reviewed-by: default avatarXiao Ni <xni@redhat.com>
Signed-off-by: default avatarSong Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230621165110.1498313-3-yukuai1@huaweicloud.com
parent c567c86b
......@@ -2306,7 +2306,7 @@ int md_integrity_register(struct mddev *mddev)
pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
(mddev->level != 1 && mddev->level != 10 &&
bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE))) {
bioset_integrity_create(&mddev->io_clone_set, BIO_POOL_SIZE))) {
/*
* No need to handle the failure of bioset_integrity_create,
* because the function is called by md_run() -> pers->run(),
......@@ -5876,9 +5876,9 @@ int md_run(struct mddev *mddev)
goto exit_bio_set;
}
if (!bioset_initialized(&mddev->io_acct_set)) {
err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
offsetof(struct md_io_acct, bio_clone), 0);
if (!bioset_initialized(&mddev->io_clone_set)) {
err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
offsetof(struct md_io_clone, bio_clone), 0);
if (err)
goto exit_sync_set;
}
......@@ -6060,7 +6060,7 @@ int md_run(struct mddev *mddev)
module_put(pers->owner);
md_bitmap_destroy(mddev);
abort:
bioset_exit(&mddev->io_acct_set);
bioset_exit(&mddev->io_clone_set);
exit_sync_set:
bioset_exit(&mddev->sync_set);
exit_bio_set:
......@@ -6285,7 +6285,7 @@ static void __md_stop(struct mddev *mddev)
percpu_ref_exit(&mddev->active_io);
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
bioset_exit(&mddev->io_acct_set);
bioset_exit(&mddev->io_clone_set);
}
void md_stop(struct mddev *mddev)
......@@ -8651,45 +8651,44 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
}
EXPORT_SYMBOL_GPL(md_submit_discard_bio);
static void md_end_io_acct(struct bio *bio)
static void md_end_clone_io(struct bio *bio)
{
struct md_io_acct *md_io_acct = bio->bi_private;
struct bio *orig_bio = md_io_acct->orig_bio;
struct mddev *mddev = md_io_acct->mddev;
struct md_io_clone *md_io_clone = bio->bi_private;
struct bio *orig_bio = md_io_clone->orig_bio;
struct mddev *mddev = md_io_clone->mddev;
orig_bio->bi_status = bio->bi_status;
bio_end_io_acct(orig_bio, md_io_acct->start_time);
if (md_io_clone->start_time)
bio_end_io_acct(orig_bio, md_io_clone->start_time);
bio_put(bio);
bio_endio(orig_bio);
percpu_ref_put(&mddev->active_io);
}
/*
* Used by personalities that don't already clone the bio and thus can't
* easily add the timestamp to their extended bio structure.
*/
void md_account_bio(struct mddev *mddev, struct bio **bio)
static void md_clone_bio(struct mddev *mddev, struct bio **bio)
{
struct block_device *bdev = (*bio)->bi_bdev;
struct md_io_acct *md_io_acct;
struct bio *clone;
if (!blk_queue_io_stat(bdev->bd_disk->queue))
return;
struct md_io_clone *md_io_clone;
struct bio *clone =
bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_clone_set);
md_io_clone = container_of(clone, struct md_io_clone, bio_clone);
md_io_clone->orig_bio = *bio;
md_io_clone->mddev = mddev;
if (blk_queue_io_stat(bdev->bd_disk->queue))
md_io_clone->start_time = bio_start_io_acct(*bio);
clone->bi_end_io = md_end_clone_io;
clone->bi_private = md_io_clone;
*bio = clone;
}
void md_account_bio(struct mddev *mddev, struct bio **bio)
{
percpu_ref_get(&mddev->active_io);
clone = bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_acct_set);
md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
md_io_acct->orig_bio = *bio;
md_io_acct->start_time = bio_start_io_acct(*bio);
md_io_acct->mddev = mddev;
clone->bi_end_io = md_end_io_acct;
clone->bi_private = md_io_acct;
*bio = clone;
md_clone_bio(mddev, bio);
}
EXPORT_SYMBOL_GPL(md_account_bio);
......
......@@ -510,7 +510,7 @@ struct mddev {
struct bio_set sync_set; /* for sync operations like
* metadata and bitmap writes
*/
struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */
struct bio_set io_clone_set;
/* Generic flush handling.
* The last to finish preflush schedules a worker to submit
......@@ -736,7 +736,7 @@ struct md_thread {
void *private;
};
struct md_io_acct {
struct md_io_clone {
struct mddev *mddev;
struct bio *orig_bio;
unsigned long start_time;
......
......@@ -5468,13 +5468,13 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf,
*/
static void raid5_align_endio(struct bio *bi)
{
struct md_io_acct *md_io_acct = bi->bi_private;
struct bio *raid_bi = md_io_acct->orig_bio;
struct md_io_clone *md_io_clone = bi->bi_private;
struct bio *raid_bi = md_io_clone->orig_bio;
struct mddev *mddev;
struct r5conf *conf;
struct md_rdev *rdev;
blk_status_t error = bi->bi_status;
unsigned long start_time = md_io_acct->start_time;
unsigned long start_time = md_io_clone->start_time;
bio_put(bi);
......@@ -5506,7 +5506,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
struct md_rdev *rdev;
sector_t sector, end_sector, first_bad;
int bad_sectors, dd_idx;
struct md_io_acct *md_io_acct;
struct md_io_clone *md_io_clone;
bool did_inc;
if (!in_chunk_boundary(mddev, raid_bio)) {
......@@ -5544,15 +5544,15 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
}
align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
&mddev->io_acct_set);
md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone);
&mddev->io_clone_set);
md_io_clone = container_of(align_bio, struct md_io_clone, bio_clone);
raid_bio->bi_next = (void *)rdev;
if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
md_io_acct->start_time = bio_start_io_acct(raid_bio);
md_io_acct->orig_bio = raid_bio;
md_io_clone->start_time = bio_start_io_acct(raid_bio);
md_io_clone->orig_bio = raid_bio;
align_bio->bi_end_io = raid5_align_endio;
align_bio->bi_private = md_io_acct;
align_bio->bi_private = md_io_clone;
align_bio->bi_iter.bi_sector = sector;
/* No reshape active, so we can trust rdev->data_offset */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment