Commit f6a0ebca authored by Neil Brown's avatar Neil Brown Committed by Linus Torvalds

[PATCH] md: support reshaping raid1 arrays - adding or removing drives.

This allows the number of "raid_disks" in a raid1 to be changed.

This requires allocating a new pool of "r1bio" structures which a different
number of bios, suspending IO, and swapping the new pool in place of the old. 
(and a few other related changes).
Signed-off-by: default avatarNeil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent f23b262d
...@@ -2388,18 +2388,23 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) ...@@ -2388,18 +2388,23 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
{ {
int rv = 0; int rv = 0;
int cnt = 0;
if (mddev->major_version != info->major_version || if (mddev->major_version != info->major_version ||
mddev->minor_version != info->minor_version || mddev->minor_version != info->minor_version ||
/* mddev->patch_version != info->patch_version || */ /* mddev->patch_version != info->patch_version || */
mddev->ctime != info->ctime || mddev->ctime != info->ctime ||
mddev->level != info->level || mddev->level != info->level ||
mddev->raid_disks != info->raid_disks ||
mddev->layout != info->layout || mddev->layout != info->layout ||
!mddev->persistent != info->not_persistent|| !mddev->persistent != info->not_persistent||
mddev->chunk_size != info->chunk_size ) mddev->chunk_size != info->chunk_size )
return -EINVAL; return -EINVAL;
/* that leaves only size */ /* Check there is only one change */
if (mddev->size != info->size) cnt++;
if (mddev->raid_disks != info->raid_disks) cnt++;
if (cnt == 0) return 0;
if (cnt > 1) return -EINVAL;
if (mddev->size != info->size) { if (mddev->size != info->size) {
mdk_rdev_t * rdev; mdk_rdev_t * rdev;
struct list_head *tmp; struct list_head *tmp;
...@@ -2443,6 +2448,28 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) ...@@ -2443,6 +2448,28 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
} }
} }
} }
if (mddev->raid_disks != info->raid_disks) {
/* change the number of raid disks */
if (mddev->pers->reshape == NULL)
return -EINVAL;
if (info->raid_disks <= 0 ||
info->raid_disks >= mddev->max_disks)
return -EINVAL;
if (mddev->sync_thread)
return -EBUSY;
rv = mddev->pers->reshape(mddev, info->raid_disks);
if (!rv) {
struct block_device *bdev;
bdev = bdget_disk(mddev->gendisk, 0);
if (bdev) {
down(&bdev->bd_inode->i_sem);
i_size_write(bdev->bd_inode, mddev->array_size << 10);
up(&bdev->bd_inode->i_sem);
bdput(bdev);
}
}
}
md_update_sb(mddev); md_update_sb(mddev);
return rv; return rv;
} }
......
...@@ -42,16 +42,17 @@ static void unplug_slaves(mddev_t *mddev); ...@@ -42,16 +42,17 @@ static void unplug_slaves(mddev_t *mddev);
static void * r1bio_pool_alloc(int gfp_flags, void *data) static void * r1bio_pool_alloc(int gfp_flags, void *data)
{ {
mddev_t *mddev = data; struct pool_info *pi = data;
r1bio_t *r1_bio; r1bio_t *r1_bio;
/* allocate a r1bio with room for raid_disks entries in the bios array */ /* allocate a r1bio with room for raid_disks entries in the bios array */
r1_bio = kmalloc(sizeof(r1bio_t) + sizeof(struct bio*)*mddev->raid_disks, r1_bio = kmalloc(sizeof(r1bio_t) + sizeof(struct bio*)*pi->raid_disks,
gfp_flags); gfp_flags);
if (r1_bio) if (r1_bio)
memset(r1_bio, 0, sizeof(*r1_bio) + sizeof(struct bio*)*mddev->raid_disks); memset(r1_bio, 0, sizeof(*r1_bio) +
sizeof(struct bio*) * pi->raid_disks);
else else
unplug_slaves(mddev); unplug_slaves(pi->mddev);
return r1_bio; return r1_bio;
} }
...@@ -69,22 +70,22 @@ static void r1bio_pool_free(void *r1_bio, void *data) ...@@ -69,22 +70,22 @@ static void r1bio_pool_free(void *r1_bio, void *data)
static void * r1buf_pool_alloc(int gfp_flags, void *data) static void * r1buf_pool_alloc(int gfp_flags, void *data)
{ {
conf_t *conf = data; struct pool_info *pi = data;
struct page *page; struct page *page;
r1bio_t *r1_bio; r1bio_t *r1_bio;
struct bio *bio; struct bio *bio;
int i, j; int i, j;
r1_bio = r1bio_pool_alloc(gfp_flags, conf->mddev); r1_bio = r1bio_pool_alloc(gfp_flags, pi);
if (!r1_bio) { if (!r1_bio) {
unplug_slaves(conf->mddev); unplug_slaves(pi->mddev);
return NULL; return NULL;
} }
/* /*
* Allocate bios : 1 for reading, n-1 for writing * Allocate bios : 1 for reading, n-1 for writing
*/ */
for (j = conf->raid_disks ; j-- ; ) { for (j = pi->raid_disks ; j-- ; ) {
bio = bio_alloc(gfp_flags, RESYNC_PAGES); bio = bio_alloc(gfp_flags, RESYNC_PAGES);
if (!bio) if (!bio)
goto out_free_bio; goto out_free_bio;
...@@ -111,16 +112,16 @@ static void * r1buf_pool_alloc(int gfp_flags, void *data) ...@@ -111,16 +112,16 @@ static void * r1buf_pool_alloc(int gfp_flags, void *data)
for ( ; i > 0 ; i--) for ( ; i > 0 ; i--)
__free_page(bio->bi_io_vec[i-1].bv_page); __free_page(bio->bi_io_vec[i-1].bv_page);
out_free_bio: out_free_bio:
while ( ++j < conf->raid_disks ) while ( ++j < pi->raid_disks )
bio_put(r1_bio->bios[j]); bio_put(r1_bio->bios[j]);
r1bio_pool_free(r1_bio, conf->mddev); r1bio_pool_free(r1_bio, data);
return NULL; return NULL;
} }
static void r1buf_pool_free(void *__r1_bio, void *data) static void r1buf_pool_free(void *__r1_bio, void *data)
{ {
struct pool_info *pi = data;
int i; int i;
conf_t *conf = data;
r1bio_t *r1bio = __r1_bio; r1bio_t *r1bio = __r1_bio;
struct bio *bio = r1bio->bios[0]; struct bio *bio = r1bio->bios[0];
...@@ -128,10 +129,10 @@ static void r1buf_pool_free(void *__r1_bio, void *data) ...@@ -128,10 +129,10 @@ static void r1buf_pool_free(void *__r1_bio, void *data)
__free_page(bio->bi_io_vec[i].bv_page); __free_page(bio->bi_io_vec[i].bv_page);
bio->bi_io_vec[i].bv_page = NULL; bio->bi_io_vec[i].bv_page = NULL;
} }
for (i=0 ; i < conf->raid_disks; i++) for (i=0 ; i < pi->raid_disks; i++)
bio_put(r1bio->bios[i]); bio_put(r1bio->bios[i]);
r1bio_pool_free(r1bio, conf->mddev); r1bio_pool_free(r1bio, data);
} }
static void put_all_bios(conf_t *conf, r1bio_t *r1_bio) static void put_all_bios(conf_t *conf, r1bio_t *r1_bio)
...@@ -510,7 +511,7 @@ static int make_request(request_queue_t *q, struct bio * bio) ...@@ -510,7 +511,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
mirror_info_t *mirror; mirror_info_t *mirror;
r1bio_t *r1_bio; r1bio_t *r1_bio;
struct bio *read_bio; struct bio *read_bio;
int i, disks = conf->raid_disks; int i, disks;
/* /*
* Register the new request and wait if the reconstruction * Register the new request and wait if the reconstruction
...@@ -570,6 +571,7 @@ static int make_request(request_queue_t *q, struct bio * bio) ...@@ -570,6 +571,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
* inc refcount on their rdev. Record them by setting * inc refcount on their rdev. Record them by setting
* bios[x] to bio * bios[x] to bio
*/ */
disks = conf->raid_disks;
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
for (i = 0; i < disks; i++) { for (i = 0; i < disks; i++) {
if (conf->mirrors[i].rdev && if (conf->mirrors[i].rdev &&
...@@ -953,7 +955,8 @@ static int init_resync(conf_t *conf) ...@@ -953,7 +955,8 @@ static int init_resync(conf_t *conf)
buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE; buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
if (conf->r1buf_pool) if (conf->r1buf_pool)
BUG(); BUG();
conf->r1buf_pool = mempool_create(buffs, r1buf_pool_alloc, r1buf_pool_free, conf); conf->r1buf_pool = mempool_create(buffs, r1buf_pool_alloc, r1buf_pool_free,
conf->poolinfo);
if (!conf->r1buf_pool) if (!conf->r1buf_pool)
return -ENOMEM; return -ENOMEM;
conf->next_resync = 0; conf->next_resync = 0;
...@@ -1136,28 +1139,28 @@ static int run(mddev_t *mddev) ...@@ -1136,28 +1139,28 @@ static int run(mddev_t *mddev)
*/ */
conf = kmalloc(sizeof(conf_t), GFP_KERNEL); conf = kmalloc(sizeof(conf_t), GFP_KERNEL);
mddev->private = conf; mddev->private = conf;
if (!conf) { if (!conf)
printk(KERN_ERR "raid1: couldn't allocate memory for %s\n", goto out_no_mem;
mdname(mddev));
goto out;
}
memset(conf, 0, sizeof(*conf)); memset(conf, 0, sizeof(*conf));
conf->mirrors = kmalloc(sizeof(struct mirror_info)*mddev->raid_disks, conf->mirrors = kmalloc(sizeof(struct mirror_info)*mddev->raid_disks,
GFP_KERNEL); GFP_KERNEL);
if (!conf->mirrors) { if (!conf->mirrors)
printk(KERN_ERR "raid1: couldn't allocate memory for %s\n", goto out_no_mem;
mdname(mddev));
goto out_free_conf;
}
memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks); memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks);
conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
if (!conf->poolinfo)
goto out_no_mem;
conf->poolinfo->mddev = mddev;
conf->poolinfo->raid_disks = mddev->raid_disks;
conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc, conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
r1bio_pool_free, mddev); r1bio_pool_free,
if (!conf->r1bio_pool) { conf->poolinfo);
printk(KERN_ERR "raid1: couldn't allocate memory for %s\n", if (!conf->r1bio_pool)
mdname(mddev)); goto out_no_mem;
goto out_free_conf;
}
mddev->queue->unplug_fn = raid1_unplug; mddev->queue->unplug_fn = raid1_unplug;
...@@ -1243,13 +1246,21 @@ static int run(mddev_t *mddev) ...@@ -1243,13 +1246,21 @@ static int run(mddev_t *mddev)
return 0; return 0;
out_no_mem:
printk(KERN_ERR "raid1: couldn't allocate memory for %s\n",
mdname(mddev));
out_free_conf: out_free_conf:
if (conf->r1bio_pool) if (conf) {
mempool_destroy(conf->r1bio_pool); if (conf->r1bio_pool)
if (conf->mirrors) mempool_destroy(conf->r1bio_pool);
kfree(conf->mirrors); if (conf->mirrors)
kfree(conf); kfree(conf->mirrors);
mddev->private = NULL; if (conf->poolinfo)
kfree(conf->poolinfo);
kfree(conf);
mddev->private = NULL;
}
out: out:
return -EIO; return -EIO;
} }
...@@ -1264,6 +1275,8 @@ static int stop(mddev_t *mddev) ...@@ -1264,6 +1275,8 @@ static int stop(mddev_t *mddev)
mempool_destroy(conf->r1bio_pool); mempool_destroy(conf->r1bio_pool);
if (conf->mirrors) if (conf->mirrors)
kfree(conf->mirrors); kfree(conf->mirrors);
if (conf->poolinfo)
kfree(conf->poolinfo);
kfree(conf); kfree(conf);
mddev->private = NULL; mddev->private = NULL;
return 0; return 0;
...@@ -1289,6 +1302,81 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors) ...@@ -1289,6 +1302,81 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
return 0; return 0;
} }
static int raid1_reshape(mddev_t *mddev, int raid_disks)
{
/* We need to:
* 1/ resize the r1bio_pool
* 2/ resize conf->mirrors
*
* We allocate a new r1bio_pool if we can.
* Then raise a device barrier and wait until all IO stops.
* Then resize conf->mirrors and swap in the new r1bio pool.
*/
mempool_t *newpool, *oldpool;
struct pool_info *newpoolinfo;
mirror_info_t *newmirrors;
conf_t *conf = mddev_to_conf(mddev);
int d;
for (d= raid_disks; d < conf->raid_disks; d++)
if (conf->mirrors[d].rdev)
return -EBUSY;
newpoolinfo = kmalloc(sizeof(newpoolinfo), GFP_KERNEL);
if (!newpoolinfo)
return -ENOMEM;
newpoolinfo->mddev = mddev;
newpoolinfo->raid_disks = raid_disks;
newpool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
r1bio_pool_free, newpoolinfo);
if (!newpool) {
kfree(newpoolinfo);
return -ENOMEM;
}
newmirrors = kmalloc(sizeof(struct mirror_info) * raid_disks, GFP_KERNEL);
if (!newmirrors) {
kfree(newpoolinfo);
mempool_destroy(newpool);
return -ENOMEM;
}
memset(newmirrors, 0, sizeof(struct mirror_info)*raid_disks);
spin_lock_irq(&conf->resync_lock);
conf->barrier++;
wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
conf->resync_lock, unplug_slaves(mddev));
spin_unlock_irq(&conf->resync_lock);
/* ok, everything is stopped */
oldpool = conf->r1bio_pool;
conf->r1bio_pool = newpool;
for (d=0; d < raid_disks && d < conf->raid_disks; d++)
newmirrors[d] = conf->mirrors[d];
kfree(conf->mirrors);
conf->mirrors = newmirrors;
kfree(conf->poolinfo);
conf->poolinfo = newpoolinfo;
mddev->degraded += (raid_disks - conf->raid_disks);
conf->raid_disks = mddev->raid_disks = raid_disks;
spin_lock_irq(&conf->resync_lock);
conf->barrier--;
spin_unlock_irq(&conf->resync_lock);
wake_up(&conf->wait_resume);
wake_up(&conf->wait_idle);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
mempool_destroy(oldpool);
return 0;
}
static mdk_personality_t raid1_personality = static mdk_personality_t raid1_personality =
{ {
.name = "raid1", .name = "raid1",
...@@ -1303,6 +1391,7 @@ static mdk_personality_t raid1_personality = ...@@ -1303,6 +1391,7 @@ static mdk_personality_t raid1_personality =
.spare_active = raid1_spare_active, .spare_active = raid1_spare_active,
.sync_request = sync_request, .sync_request = sync_request,
.resize = raid1_resize, .resize = raid1_resize,
.reshape = raid1_reshape,
}; };
static int __init raid_init(void) static int __init raid_init(void)
......
...@@ -280,6 +280,7 @@ struct mdk_personality_s ...@@ -280,6 +280,7 @@ struct mdk_personality_s
int (*spare_active) (mddev_t *mddev); int (*spare_active) (mddev_t *mddev);
int (*sync_request)(mddev_t *mddev, sector_t sector_nr, int go_faster); int (*sync_request)(mddev_t *mddev, sector_t sector_nr, int go_faster);
int (*resize) (mddev_t *mddev, sector_t sectors); int (*resize) (mddev_t *mddev, sector_t sectors);
int (*reshape) (mddev_t *mddev, int raid_disks);
}; };
......
...@@ -10,6 +10,20 @@ struct mirror_info { ...@@ -10,6 +10,20 @@ struct mirror_info {
sector_t head_position; sector_t head_position;
}; };
/*
* memory pools need a pointer to the mddev, so they can force an unplug
* when memory is tight, and a count of the number of drives that the
* pool was allocated for, so they know how much to allocate and free.
* mddev->raid_disks cannot be used, as it can change while a pool is active
* These two datums are stored in a kmalloced struct.
*/
struct pool_info {
mddev_t *mddev;
int raid_disks;
};
typedef struct r1bio_s r1bio_t; typedef struct r1bio_s r1bio_t;
struct r1_private_data_s { struct r1_private_data_s {
...@@ -31,6 +45,8 @@ struct r1_private_data_s { ...@@ -31,6 +45,8 @@ struct r1_private_data_s {
wait_queue_head_t wait_idle; wait_queue_head_t wait_idle;
wait_queue_head_t wait_resume; wait_queue_head_t wait_resume;
struct pool_info *poolinfo;
mempool_t *r1bio_pool; mempool_t *r1bio_pool;
mempool_t *r1buf_pool; mempool_t *r1buf_pool;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment