Commit 32c31806 authored by Neil Brown's avatar Neil Brown Committed by Linus Torvalds

[PATCH] md: assorted fixes/improvemnet to generic md resync code.

1/ Introduce "mddev->resync_max_sectors" so that an md personality
can ask for resync to cover a different address range than that of a
single drive.  raid10 will use this.

2/ fix is_mddev_idle so that if there seem to be a negative number
 of events, it doesn't immediately assume activity.

3/ make "sync_io" (the count of IO sectors used for array resync)
 an atomic_t to avoid SMP races. 

4/ Pass md_sync_acct a "block_device" rather than the containing "rdev",
  as the whole rdev isn't needed. Also make this an inline function.

5/ Make sure recovery gets interrupted on any error.
Signed-off-by: default avatarNeil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent b60e5e71
...@@ -1648,6 +1648,8 @@ static int do_md_run(mddev_t * mddev) ...@@ -1648,6 +1648,8 @@ static int do_md_run(mddev_t * mddev)
mddev->pers = pers[pnum]; mddev->pers = pers[pnum];
spin_unlock(&pers_lock); spin_unlock(&pers_lock);
mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */
err = mddev->pers->run(mddev); err = mddev->pers->run(mddev);
if (err) { if (err) {
printk(KERN_ERR "md: pers->run() failed ...\n"); printk(KERN_ERR "md: pers->run() failed ...\n");
...@@ -2953,6 +2955,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -2953,6 +2955,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
if (!mddev->pers->error_handler) if (!mddev->pers->error_handler)
return; return;
mddev->pers->error_handler(mddev,rdev); mddev->pers->error_handler(mddev,rdev);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
} }
...@@ -2985,6 +2988,10 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) ...@@ -2985,6 +2988,10 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
unsigned long max_blocks, resync, res, dt, db, rt; unsigned long max_blocks, resync, res, dt, db, rt;
resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2; resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2;
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
max_blocks = mddev->resync_max_sectors >> 1;
else
max_blocks = mddev->size; max_blocks = mddev->size;
/* /*
...@@ -3221,11 +3228,6 @@ int unregister_md_personality(int pnum) ...@@ -3221,11 +3228,6 @@ int unregister_md_personality(int pnum)
return 0; return 0;
} }
void md_sync_acct(mdk_rdev_t *rdev, unsigned long nr_sectors)
{
rdev->bdev->bd_contains->bd_disk->sync_io += nr_sectors;
}
static int is_mddev_idle(mddev_t *mddev) static int is_mddev_idle(mddev_t *mddev)
{ {
mdk_rdev_t * rdev; mdk_rdev_t * rdev;
...@@ -3238,8 +3240,12 @@ static int is_mddev_idle(mddev_t *mddev) ...@@ -3238,8 +3240,12 @@ static int is_mddev_idle(mddev_t *mddev)
struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
curr_events = disk_stat_read(disk, read_sectors) + curr_events = disk_stat_read(disk, read_sectors) +
disk_stat_read(disk, write_sectors) - disk_stat_read(disk, write_sectors) -
disk->sync_io; atomic_read(&disk->sync_io);
if ((curr_events - rdev->last_events) > 32) { /* Allow some slack between valud of curr_events and last_events,
* as there are some uninteresting races.
* Note: the following is an unsigned comparison.
*/
if ((curr_events - rdev->last_events + 32) > 64) {
rdev->last_events = curr_events; rdev->last_events = curr_events;
idle = 0; idle = 0;
} }
...@@ -3373,6 +3379,13 @@ static void md_do_sync(mddev_t *mddev) ...@@ -3373,6 +3379,13 @@ static void md_do_sync(mddev_t *mddev)
} }
} while (mddev->curr_resync < 2); } while (mddev->curr_resync < 2);
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
/* resync follows the size requested by the personality,
* which default to physical size, but can be virtual size
*/
max_sectors = mddev->resync_max_sectors;
else
/* recovery follows the physical size of devices */
max_sectors = mddev->size << 1; max_sectors = mddev->size << 1;
printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev));
...@@ -3796,7 +3809,6 @@ module_exit(md_exit) ...@@ -3796,7 +3809,6 @@ module_exit(md_exit)
EXPORT_SYMBOL(register_md_personality); EXPORT_SYMBOL(register_md_personality);
EXPORT_SYMBOL(unregister_md_personality); EXPORT_SYMBOL(unregister_md_personality);
EXPORT_SYMBOL(md_error); EXPORT_SYMBOL(md_error);
EXPORT_SYMBOL(md_sync_acct);
EXPORT_SYMBOL(md_done_sync); EXPORT_SYMBOL(md_done_sync);
EXPORT_SYMBOL(md_write_start); EXPORT_SYMBOL(md_write_start);
EXPORT_SYMBOL(md_write_end); EXPORT_SYMBOL(md_write_end);
......
...@@ -903,7 +903,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) ...@@ -903,7 +903,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
atomic_inc(&conf->mirrors[i].rdev->nr_pending); atomic_inc(&conf->mirrors[i].rdev->nr_pending);
atomic_inc(&r1_bio->remaining); atomic_inc(&r1_bio->remaining);
md_sync_acct(conf->mirrors[i].rdev, wbio->bi_size >> 9); md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9);
generic_make_request(wbio); generic_make_request(wbio);
} }
...@@ -1143,7 +1143,7 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) ...@@ -1143,7 +1143,7 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
bio = r1_bio->bios[disk]; bio = r1_bio->bios[disk];
r1_bio->sectors = nr_sectors; r1_bio->sectors = nr_sectors;
md_sync_acct(mirror->rdev, nr_sectors); md_sync_acct(mirror->rdev->bdev, nr_sectors);
generic_make_request(bio); generic_make_request(bio);
......
...@@ -1071,7 +1071,8 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -1071,7 +1071,8 @@ static void handle_stripe(struct stripe_head *sh)
PRINTK("Reading block %d (sync=%d)\n", PRINTK("Reading block %d (sync=%d)\n",
i, syncing); i, syncing);
if (syncing) if (syncing)
md_sync_acct(conf->disks[i].rdev, STRIPE_SECTORS); md_sync_acct(conf->disks[i].rdev->bdev,
STRIPE_SECTORS);
} }
} }
} }
...@@ -1256,7 +1257,7 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -1256,7 +1257,7 @@ static void handle_stripe(struct stripe_head *sh)
if (rdev) { if (rdev) {
if (test_bit(R5_Syncio, &sh->dev[i].flags)) if (test_bit(R5_Syncio, &sh->dev[i].flags))
md_sync_acct(rdev, STRIPE_SECTORS); md_sync_acct(rdev->bdev, STRIPE_SECTORS);
bi->bi_bdev = rdev->bdev; bi->bi_bdev = rdev->bdev;
PRINTK("for %llu schedule op %ld on disc %d\n", PRINTK("for %llu schedule op %ld on disc %d\n",
......
...@@ -1208,7 +1208,8 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -1208,7 +1208,8 @@ static void handle_stripe(struct stripe_head *sh)
PRINTK("Reading block %d (sync=%d)\n", PRINTK("Reading block %d (sync=%d)\n",
i, syncing); i, syncing);
if (syncing) if (syncing)
md_sync_acct(conf->disks[i].rdev, STRIPE_SECTORS); md_sync_acct(conf->disks[i].rdev->bdev,
STRIPE_SECTORS);
} }
} }
} }
...@@ -1418,7 +1419,7 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -1418,7 +1419,7 @@ static void handle_stripe(struct stripe_head *sh)
if (rdev) { if (rdev) {
if (test_bit(R5_Syncio, &sh->dev[i].flags)) if (test_bit(R5_Syncio, &sh->dev[i].flags))
md_sync_acct(rdev, STRIPE_SECTORS); md_sync_acct(rdev->bdev, STRIPE_SECTORS);
bi->bi_bdev = rdev->bdev; bi->bi_bdev = rdev->bdev;
PRINTK("for %llu schedule op %ld on disc %d\n", PRINTK("for %llu schedule op %ld on disc %d\n",
......
...@@ -100,7 +100,7 @@ struct gendisk { ...@@ -100,7 +100,7 @@ struct gendisk {
struct timer_rand_state *random; struct timer_rand_state *random;
int policy; int policy;
unsigned sync_io; /* RAID */ atomic_t sync_io; /* RAID */
unsigned long stamp, stamp_idle; unsigned long stamp, stamp_idle;
int in_flight; int in_flight;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
......
...@@ -74,7 +74,6 @@ extern void md_write_start(mddev_t *mddev); ...@@ -74,7 +74,6 @@ extern void md_write_start(mddev_t *mddev);
extern void md_write_end(mddev_t *mddev); extern void md_write_end(mddev_t *mddev);
extern void md_handle_safemode(mddev_t *mddev); extern void md_handle_safemode(mddev_t *mddev);
extern void md_done_sync(mddev_t *mddev, int blocks, int ok); extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
extern void md_sync_acct(mdk_rdev_t *rdev, unsigned long nr_sectors);
extern void md_error (mddev_t *mddev, mdk_rdev_t *rdev); extern void md_error (mddev_t *mddev, mdk_rdev_t *rdev);
extern void md_unplug_mddev(mddev_t *mddev); extern void md_unplug_mddev(mddev_t *mddev);
......
...@@ -216,6 +216,7 @@ struct mddev_s ...@@ -216,6 +216,7 @@ struct mddev_s
unsigned long resync_mark; /* a recent timestamp */ unsigned long resync_mark; /* a recent timestamp */
sector_t resync_mark_cnt;/* blocks written at resync_mark */ sector_t resync_mark_cnt;/* blocks written at resync_mark */
sector_t resync_max_sectors; /* may be set by personality */
/* recovery/resync flags /* recovery/resync flags
* NEEDED: we might need to start a resync/recover * NEEDED: we might need to start a resync/recover
* RUNNING: a thread is running, or about to be started * RUNNING: a thread is running, or about to be started
...@@ -263,6 +264,11 @@ static inline void rdev_dec_pending(mdk_rdev_t *rdev, mddev_t *mddev) ...@@ -263,6 +264,11 @@ static inline void rdev_dec_pending(mdk_rdev_t *rdev, mddev_t *mddev)
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
} }
static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
{
atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io);
}
struct mdk_personality_s struct mdk_personality_s
{ {
char *name; char *name;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment