Commit 36bbffc0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'md-3.4-fixes' of git://neil.brown.name/md

Pull assorted md fixes from Neil Brown:
 - some RAID levels didn't clear up properly if md_integrity_register
  failed
 - a 'check' of RAID5/RAID6 doesn't actually read any data since a
   recent patch - so fix that (and mark for -stable)
 - a couple of other minor bugs.

* tag 'md-3.4-fixes' of git://neil.brown.name/md:
  md/raid1,raid10: don't compare excess byte during consistency check.
  md/raid5: Fix a bug about judging if the operation is syncing or replacing
  md/raid1:Remove unnecessary rcu_dereference(conf->mirrors[i].rdev).
  md: Avoid OOPS when reshaping raid1 to raid0
  md/raid5: fix handling of bad blocks during recovery.
  md/raid1: If md_integrity_register() failed,run() must free the mem
  md/raid0: If md_integrity_register() fails, raid0_run() must free the mem.
  md/linear: If md_integrity_register() fails, linear_run() must free the mem.
parents 20a2a811 5020ad7d
...@@ -198,6 +198,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) ...@@ -198,6 +198,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
static int linear_run (struct mddev *mddev) static int linear_run (struct mddev *mddev)
{ {
struct linear_conf *conf; struct linear_conf *conf;
int ret;
if (md_check_no_bitmap(mddev)) if (md_check_no_bitmap(mddev))
return -EINVAL; return -EINVAL;
...@@ -211,7 +212,13 @@ static int linear_run (struct mddev *mddev) ...@@ -211,7 +212,13 @@ static int linear_run (struct mddev *mddev)
blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec); blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
mddev->queue->backing_dev_info.congested_fn = linear_congested; mddev->queue->backing_dev_info.congested_fn = linear_congested;
mddev->queue->backing_dev_info.congested_data = mddev; mddev->queue->backing_dev_info.congested_data = mddev;
return md_integrity_register(mddev);
ret = md_integrity_register(mddev);
if (ret) {
kfree(conf);
mddev->private = NULL;
}
return ret;
} }
static int linear_add(struct mddev *mddev, struct md_rdev *rdev) static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
......
...@@ -407,6 +407,8 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks ...@@ -407,6 +407,8 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks
return array_sectors; return array_sectors;
} }
static int raid0_stop(struct mddev *mddev);
static int raid0_run(struct mddev *mddev) static int raid0_run(struct mddev *mddev)
{ {
struct r0conf *conf; struct r0conf *conf;
...@@ -454,7 +456,12 @@ static int raid0_run(struct mddev *mddev) ...@@ -454,7 +456,12 @@ static int raid0_run(struct mddev *mddev)
blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
dump_zones(mddev); dump_zones(mddev);
return md_integrity_register(mddev);
ret = md_integrity_register(mddev);
if (ret)
raid0_stop(mddev);
return ret;
} }
static int raid0_stop(struct mddev *mddev) static int raid0_stop(struct mddev *mddev)
...@@ -625,6 +632,7 @@ static void *raid0_takeover_raid10(struct mddev *mddev) ...@@ -625,6 +632,7 @@ static void *raid0_takeover_raid10(struct mddev *mddev)
static void *raid0_takeover_raid1(struct mddev *mddev) static void *raid0_takeover_raid1(struct mddev *mddev)
{ {
struct r0conf *priv_conf; struct r0conf *priv_conf;
int chunksect;
/* Check layout: /* Check layout:
* - (N - 1) mirror drives must be already faulty * - (N - 1) mirror drives must be already faulty
...@@ -635,10 +643,25 @@ static void *raid0_takeover_raid1(struct mddev *mddev) ...@@ -635,10 +643,25 @@ static void *raid0_takeover_raid1(struct mddev *mddev)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
/*
* a raid1 doesn't have the notion of chunk size, so
* figure out the largest suitable size we can use.
*/
chunksect = 64 * 2; /* 64K by default */
/* The array must be an exact multiple of chunksize */
while (chunksect && (mddev->array_sectors & (chunksect - 1)))
chunksect >>= 1;
if ((chunksect << 9) < PAGE_SIZE)
/* array size does not allow a suitable chunk size */
return ERR_PTR(-EINVAL);
/* Set new parameters */ /* Set new parameters */
mddev->new_level = 0; mddev->new_level = 0;
mddev->new_layout = 0; mddev->new_layout = 0;
mddev->new_chunk_sectors = 128; /* by default set chunk size to 64k */ mddev->new_chunk_sectors = chunksect;
mddev->chunk_sectors = chunksect;
mddev->delta_disks = 1 - mddev->raid_disks; mddev->delta_disks = 1 - mddev->raid_disks;
mddev->raid_disks = 1; mddev->raid_disks = 1;
/* make sure it will be not marked as dirty */ /* make sure it will be not marked as dirty */
......
...@@ -1738,7 +1738,7 @@ static int process_checks(struct r1bio *r1_bio) ...@@ -1738,7 +1738,7 @@ static int process_checks(struct r1bio *r1_bio)
s = sbio->bi_io_vec[j].bv_page; s = sbio->bi_io_vec[j].bv_page;
if (memcmp(page_address(p), if (memcmp(page_address(p),
page_address(s), page_address(s),
PAGE_SIZE)) sbio->bi_io_vec[j].bv_len))
break; break;
} }
} else } else
...@@ -2386,8 +2386,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp ...@@ -2386,8 +2386,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
int ok = 1; int ok = 1;
for (i = 0 ; i < conf->raid_disks * 2 ; i++) for (i = 0 ; i < conf->raid_disks * 2 ; i++)
if (r1_bio->bios[i]->bi_end_io == end_sync_write) { if (r1_bio->bios[i]->bi_end_io == end_sync_write) {
struct md_rdev *rdev = struct md_rdev *rdev = conf->mirrors[i].rdev;
rcu_dereference(conf->mirrors[i].rdev);
ok = rdev_set_badblocks(rdev, sector_nr, ok = rdev_set_badblocks(rdev, sector_nr,
min_bad, 0 min_bad, 0
) && ok; ) && ok;
...@@ -2636,11 +2635,13 @@ static struct r1conf *setup_conf(struct mddev *mddev) ...@@ -2636,11 +2635,13 @@ static struct r1conf *setup_conf(struct mddev *mddev)
return ERR_PTR(err); return ERR_PTR(err);
} }
static int stop(struct mddev *mddev);
static int run(struct mddev *mddev) static int run(struct mddev *mddev)
{ {
struct r1conf *conf; struct r1conf *conf;
int i; int i;
struct md_rdev *rdev; struct md_rdev *rdev;
int ret;
if (mddev->level != 1) { if (mddev->level != 1) {
printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring (%d)\n", printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring (%d)\n",
...@@ -2705,7 +2706,11 @@ static int run(struct mddev *mddev) ...@@ -2705,7 +2706,11 @@ static int run(struct mddev *mddev)
mddev->queue->backing_dev_info.congested_data = mddev; mddev->queue->backing_dev_info.congested_data = mddev;
blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec); blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec);
} }
return md_integrity_register(mddev);
ret = md_integrity_register(mddev);
if (ret)
stop(mddev);
return ret;
} }
static int stop(struct mddev *mddev) static int stop(struct mddev *mddev)
......
...@@ -1821,7 +1821,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) ...@@ -1821,7 +1821,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
for (j = 0; j < vcnt; j++) for (j = 0; j < vcnt; j++)
if (memcmp(page_address(fbio->bi_io_vec[j].bv_page), if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
page_address(tbio->bi_io_vec[j].bv_page), page_address(tbio->bi_io_vec[j].bv_page),
PAGE_SIZE)) fbio->bi_io_vec[j].bv_len))
break; break;
if (j == vcnt) if (j == vcnt)
continue; continue;
......
...@@ -2471,18 +2471,19 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh, ...@@ -2471,18 +2471,19 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
int abort = 0; int abort = 0;
int i; int i;
md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
clear_bit(STRIPE_SYNCING, &sh->state); clear_bit(STRIPE_SYNCING, &sh->state);
s->syncing = 0; s->syncing = 0;
s->replacing = 0; s->replacing = 0;
/* There is nothing more to do for sync/check/repair. /* There is nothing more to do for sync/check/repair.
* Don't even need to abort as that is handled elsewhere
* if needed, and not always wanted e.g. if there is a known
* bad block here.
* For recover/replace we need to record a bad block on all * For recover/replace we need to record a bad block on all
* non-sync devices, or abort the recovery * non-sync devices, or abort the recovery
*/ */
if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) if (test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) {
return; /* During recovery devices cannot be removed, so
/* During recovery devices cannot be removed, so locking and * locking and refcounting of rdevs is not needed
* refcounting of rdevs is not needed
*/ */
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
struct md_rdev *rdev = conf->disks[i].rdev; struct md_rdev *rdev = conf->disks[i].rdev;
...@@ -2500,10 +2501,11 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh, ...@@ -2500,10 +2501,11 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
STRIPE_SECTORS, 0)) STRIPE_SECTORS, 0))
abort = 1; abort = 1;
} }
if (abort) { if (abort)
conf->recovery_disabled = conf->mddev->recovery_disabled; conf->recovery_disabled =
set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery); conf->mddev->recovery_disabled;
} }
md_done_sync(conf->mddev, STRIPE_SECTORS, !abort);
} }
static int want_replace(struct stripe_head *sh, int disk_idx) static int want_replace(struct stripe_head *sh, int disk_idx)
...@@ -3203,7 +3205,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) ...@@ -3203,7 +3205,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
/* Not in-sync */; /* Not in-sync */;
else if (is_bad) { else if (is_bad) {
/* also not in-sync */ /* also not in-sync */
if (!test_bit(WriteErrorSeen, &rdev->flags)) { if (!test_bit(WriteErrorSeen, &rdev->flags) &&
test_bit(R5_UPTODATE, &dev->flags)) {
/* treat as in-sync, but with a read error /* treat as in-sync, but with a read error
* which we can now try to correct * which we can now try to correct
*/ */
...@@ -3276,12 +3279,14 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) ...@@ -3276,12 +3279,14 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
/* If there is a failed device being replaced, /* If there is a failed device being replaced,
* we must be recovering. * we must be recovering.
* else if we are after recovery_cp, we must be syncing * else if we are after recovery_cp, we must be syncing
* else if MD_RECOVERY_REQUESTED is set, we also are syncing.
* else we can only be replacing * else we can only be replacing
* sync and recovery both need to read all devices, and so * sync and recovery both need to read all devices, and so
* use the same flag. * use the same flag.
*/ */
if (do_recovery || if (do_recovery ||
sh->sector >= conf->mddev->recovery_cp) sh->sector >= conf->mddev->recovery_cp ||
test_bit(MD_RECOVERY_REQUESTED, &(conf->mddev->recovery)))
s->syncing = 1; s->syncing = 1;
else else
s->replacing = 1; s->replacing = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment