Commit ad01c9e3 authored by NeilBrown's avatar NeilBrown Committed by Linus Torvalds

[PATCH] md: Allow stripes to be expanded in preparation for expanding an array

Before a RAID-5 can be expanded, we need to be able to expand the stripe-cache
data structure.

This requires allocating new stripes in a new kmem_cache.  If this succeeds,
we copy cache pages over and release the old stripes and kmem_cache.

We then allocate new pages.  If that fails, we leave the stripe cache at it's
new size.  It isn't worth the effort to shrink it back again.

Unfortuanately this means we need two kmem_cache names as we, for a short
period of time, we have two kmem_caches.  So they are raid5/%s and
raid5/%s-alt
Signed-off-by: default avatarNeil Brown <neilb@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent b55e6bfc
...@@ -2775,7 +2775,6 @@ static void autorun_array(mddev_t *mddev) ...@@ -2775,7 +2775,6 @@ static void autorun_array(mddev_t *mddev)
*/ */
static void autorun_devices(int part) static void autorun_devices(int part)
{ {
struct list_head candidates;
struct list_head *tmp; struct list_head *tmp;
mdk_rdev_t *rdev0, *rdev; mdk_rdev_t *rdev0, *rdev;
mddev_t *mddev; mddev_t *mddev;
...@@ -2784,6 +2783,7 @@ static void autorun_devices(int part) ...@@ -2784,6 +2783,7 @@ static void autorun_devices(int part)
printk(KERN_INFO "md: autorun ...\n"); printk(KERN_INFO "md: autorun ...\n");
while (!list_empty(&pending_raid_disks)) { while (!list_empty(&pending_raid_disks)) {
dev_t dev; dev_t dev;
LIST_HEAD(candidates);
rdev0 = list_entry(pending_raid_disks.next, rdev0 = list_entry(pending_raid_disks.next,
mdk_rdev_t, same_set); mdk_rdev_t, same_set);
......
...@@ -313,20 +313,143 @@ static int grow_stripes(raid5_conf_t *conf, int num) ...@@ -313,20 +313,143 @@ static int grow_stripes(raid5_conf_t *conf, int num)
kmem_cache_t *sc; kmem_cache_t *sc;
int devs = conf->raid_disks; int devs = conf->raid_disks;
sprintf(conf->cache_name, "raid5/%s", mdname(conf->mddev)); sprintf(conf->cache_name[0], "raid5/%s", mdname(conf->mddev));
sprintf(conf->cache_name[1], "raid5/%s-alt", mdname(conf->mddev));
sc = kmem_cache_create(conf->cache_name, conf->active_name = 0;
sc = kmem_cache_create(conf->cache_name[conf->active_name],
sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
0, 0, NULL, NULL); 0, 0, NULL, NULL);
if (!sc) if (!sc)
return 1; return 1;
conf->slab_cache = sc; conf->slab_cache = sc;
conf->pool_size = devs;
while (num--) { while (num--) {
if (!grow_one_stripe(conf)) if (!grow_one_stripe(conf))
return 1; return 1;
} }
return 0; return 0;
} }
static int resize_stripes(raid5_conf_t *conf, int newsize)
{
/* Make all the stripes able to hold 'newsize' devices.
* New slots in each stripe get 'page' set to a new page.
*
* This happens in stages:
* 1/ create a new kmem_cache and allocate the required number of
* stripe_heads.
* 2/ gather all the old stripe_heads and tranfer the pages across
* to the new stripe_heads. This will have the side effect of
* freezing the array as once all stripe_heads have been collected,
* no IO will be possible. Old stripe heads are freed once their
* pages have been transferred over, and the old kmem_cache is
* freed when all stripes are done.
* 3/ reallocate conf->disks to be suitable bigger. If this fails,
* we simple return a failre status - no need to clean anything up.
* 4/ allocate new pages for the new slots in the new stripe_heads.
* If this fails, we don't bother trying the shrink the
* stripe_heads down again, we just leave them as they are.
* As each stripe_head is processed the new one is released into
* active service.
*
* Once step2 is started, we cannot afford to wait for a write,
* so we use GFP_NOIO allocations.
*/
struct stripe_head *osh, *nsh;
LIST_HEAD(newstripes);
struct disk_info *ndisks;
int err = 0;
kmem_cache_t *sc;
int i;
if (newsize <= conf->pool_size)
return 0; /* never bother to shrink */
/* Step 1 */
sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
0, 0, NULL, NULL);
if (!sc)
return -ENOMEM;
for (i = conf->max_nr_stripes; i; i--) {
nsh = kmem_cache_alloc(sc, GFP_KERNEL);
if (!nsh)
break;
memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev));
nsh->raid_conf = conf;
spin_lock_init(&nsh->lock);
list_add(&nsh->lru, &newstripes);
}
if (i) {
/* didn't get enough, give up */
while (!list_empty(&newstripes)) {
nsh = list_entry(newstripes.next, struct stripe_head, lru);
list_del(&nsh->lru);
kmem_cache_free(sc, nsh);
}
kmem_cache_destroy(sc);
return -ENOMEM;
}
/* Step 2 - Must use GFP_NOIO now.
* OK, we have enough stripes, start collecting inactive
* stripes and copying them over
*/
list_for_each_entry(nsh, &newstripes, lru) {
spin_lock_irq(&conf->device_lock);
wait_event_lock_irq(conf->wait_for_stripe,
!list_empty(&conf->inactive_list),
conf->device_lock,
unplug_slaves(conf->mddev);
);
osh = get_free_stripe(conf);
spin_unlock_irq(&conf->device_lock);
atomic_set(&nsh->count, 1);
for(i=0; i<conf->pool_size; i++)
nsh->dev[i].page = osh->dev[i].page;
for( ; i<newsize; i++)
nsh->dev[i].page = NULL;
kmem_cache_free(conf->slab_cache, osh);
}
kmem_cache_destroy(conf->slab_cache);
/* Step 3.
* At this point, we are holding all the stripes so the array
* is completely stalled, so now is a good time to resize
* conf->disks.
*/
ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
if (ndisks) {
for (i=0; i<conf->raid_disks; i++)
ndisks[i] = conf->disks[i];
kfree(conf->disks);
conf->disks = ndisks;
} else
err = -ENOMEM;
/* Step 4, return new stripes to service */
while(!list_empty(&newstripes)) {
nsh = list_entry(newstripes.next, struct stripe_head, lru);
list_del_init(&nsh->lru);
for (i=conf->raid_disks; i < newsize; i++)
if (nsh->dev[i].page == NULL) {
struct page *p = alloc_page(GFP_NOIO);
nsh->dev[i].page = p;
if (!p)
err = -ENOMEM;
}
release_stripe(nsh);
}
/* critical section pass, GFP_NOIO no longer needed */
conf->slab_cache = sc;
conf->active_name = 1-conf->active_name;
conf->pool_size = newsize;
return err;
}
static int drop_one_stripe(raid5_conf_t *conf) static int drop_one_stripe(raid5_conf_t *conf)
{ {
...@@ -339,7 +462,7 @@ static int drop_one_stripe(raid5_conf_t *conf) ...@@ -339,7 +462,7 @@ static int drop_one_stripe(raid5_conf_t *conf)
return 0; return 0;
if (atomic_read(&sh->count)) if (atomic_read(&sh->count))
BUG(); BUG();
shrink_buffers(sh, conf->raid_disks); shrink_buffers(sh, conf->pool_size);
kmem_cache_free(conf->slab_cache, sh); kmem_cache_free(conf->slab_cache, sh);
atomic_dec(&conf->active_stripes); atomic_dec(&conf->active_stripes);
return 1; return 1;
......
...@@ -331,9 +331,9 @@ static int grow_stripes(raid6_conf_t *conf, int num) ...@@ -331,9 +331,9 @@ static int grow_stripes(raid6_conf_t *conf, int num)
kmem_cache_t *sc; kmem_cache_t *sc;
int devs = conf->raid_disks; int devs = conf->raid_disks;
sprintf(conf->cache_name, "raid6/%s", mdname(conf->mddev)); sprintf(conf->cache_name[0], "raid6/%s", mdname(conf->mddev));
sc = kmem_cache_create(conf->cache_name, sc = kmem_cache_create(conf->cache_name[0],
sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
0, 0, NULL, NULL); 0, 0, NULL, NULL);
if (!sc) if (!sc)
......
...@@ -216,7 +216,11 @@ struct raid5_private_data { ...@@ -216,7 +216,11 @@ struct raid5_private_data {
struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */ struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */
atomic_t preread_active_stripes; /* stripes with scheduled io */ atomic_t preread_active_stripes; /* stripes with scheduled io */
char cache_name[20]; /* unfortunately we need two cache names as we temporarily have
* two caches.
*/
int active_name;
char cache_name[2][20];
kmem_cache_t *slab_cache; /* for allocating stripes */ kmem_cache_t *slab_cache; /* for allocating stripes */
int seq_flush, seq_write; int seq_flush, seq_write;
...@@ -239,6 +243,7 @@ struct raid5_private_data { ...@@ -239,6 +243,7 @@ struct raid5_private_data {
int inactive_blocked; /* release of inactive stripes blocked, int inactive_blocked; /* release of inactive stripes blocked,
* waiting for 25% to be free * waiting for 25% to be free
*/ */
int pool_size; /* number of disks in stripeheads in pool */
spinlock_t device_lock; spinlock_t device_lock;
struct disk_info *disks; struct disk_info *disks;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment