Commit c93983bf authored by NeilBrown's avatar NeilBrown Committed by Linus Torvalds

[PATCH] md: support stripe/offset mode in raid10

The "industry standard" DDF format allows for a stripe/offset layout where
data is duplicated on different stripes.  e.g.

  A  B  C  D
  D  A  B  C
  E  F  G  H
  H  E  F  G

(columns are drives, rows are stripes, LETTERS are chunks of data).

This is similar to raid10's 'far' mode, but not quite the same.  So enhance
'far' mode with a 'far/offset' option which follows the layout of DDFs
stripe/offset.
Signed-off-by: default avatarNeil Brown <neilb@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 7c7546cc
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
* raid_disks * raid_disks
* near_copies (stored in low byte of layout) * near_copies (stored in low byte of layout)
* far_copies (stored in second byte of layout) * far_copies (stored in second byte of layout)
* far_offset (stored in bit 16 of layout )
* *
* The data to be stored is divided into chunks using chunksize. * The data to be stored is divided into chunks using chunksize.
* Each device is divided into far_copies sections. * Each device is divided into far_copies sections.
...@@ -36,10 +37,14 @@ ...@@ -36,10 +37,14 @@
* near_copies copies of each chunk is stored (each on a different drive). * near_copies copies of each chunk is stored (each on a different drive).
* The starting device for each section is offset near_copies from the starting * The starting device for each section is offset near_copies from the starting
* device of the previous section. * device of the previous section.
* Thus there are (near_copies*far_copies) of each chunk, and each is on a different * Thus they are (near_copies*far_copies) of each chunk, and each is on a different
* drive. * drive.
* near_copies and far_copies must be at least one, and their product is at most * near_copies and far_copies must be at least one, and their product is at most
* raid_disks. * raid_disks.
*
* If far_offset is true, then the far_copies are handled a bit differently.
* The copies are still in different stripes, but instead of be very far apart
* on disk, there are adjacent stripes.
*/ */
/* /*
...@@ -357,8 +362,7 @@ static int raid10_end_write_request(struct bio *bio, unsigned int bytes_done, in ...@@ -357,8 +362,7 @@ static int raid10_end_write_request(struct bio *bio, unsigned int bytes_done, in
* With this layout, and block is never stored twice on the one device. * With this layout, and block is never stored twice on the one device.
* *
* raid10_find_phys finds the sector offset of a given virtual sector * raid10_find_phys finds the sector offset of a given virtual sector
* on each device that it is on. If a block isn't on a device, * on each device that it is on.
* that entry in the array is set to MaxSector.
* *
* raid10_find_virt does the reverse mapping, from a device and a * raid10_find_virt does the reverse mapping, from a device and a
* sector offset to a virtual address * sector offset to a virtual address
...@@ -381,6 +385,8 @@ static void raid10_find_phys(conf_t *conf, r10bio_t *r10bio) ...@@ -381,6 +385,8 @@ static void raid10_find_phys(conf_t *conf, r10bio_t *r10bio)
chunk *= conf->near_copies; chunk *= conf->near_copies;
stripe = chunk; stripe = chunk;
dev = sector_div(stripe, conf->raid_disks); dev = sector_div(stripe, conf->raid_disks);
if (conf->far_offset)
stripe *= conf->far_copies;
sector += stripe << conf->chunk_shift; sector += stripe << conf->chunk_shift;
...@@ -414,6 +420,15 @@ static sector_t raid10_find_virt(conf_t *conf, sector_t sector, int dev) ...@@ -414,6 +420,15 @@ static sector_t raid10_find_virt(conf_t *conf, sector_t sector, int dev)
{ {
sector_t offset, chunk, vchunk; sector_t offset, chunk, vchunk;
offset = sector & conf->chunk_mask;
if (conf->far_offset) {
int fc;
chunk = sector >> conf->chunk_shift;
fc = sector_div(chunk, conf->far_copies);
dev -= fc * conf->near_copies;
if (dev < 0)
dev += conf->raid_disks;
} else {
while (sector > conf->stride) { while (sector > conf->stride) {
sector -= conf->stride; sector -= conf->stride;
if (dev < conf->near_copies) if (dev < conf->near_copies)
...@@ -421,9 +436,8 @@ static sector_t raid10_find_virt(conf_t *conf, sector_t sector, int dev) ...@@ -421,9 +436,8 @@ static sector_t raid10_find_virt(conf_t *conf, sector_t sector, int dev)
else else
dev -= conf->near_copies; dev -= conf->near_copies;
} }
offset = sector & conf->chunk_mask;
chunk = sector >> conf->chunk_shift; chunk = sector >> conf->chunk_shift;
}
vchunk = chunk * conf->raid_disks + dev; vchunk = chunk * conf->raid_disks + dev;
sector_div(vchunk, conf->near_copies); sector_div(vchunk, conf->near_copies);
return (vchunk << conf->chunk_shift) + offset; return (vchunk << conf->chunk_shift) + offset;
...@@ -900,9 +914,12 @@ static void status(struct seq_file *seq, mddev_t *mddev) ...@@ -900,9 +914,12 @@ static void status(struct seq_file *seq, mddev_t *mddev)
seq_printf(seq, " %dK chunks", mddev->chunk_size/1024); seq_printf(seq, " %dK chunks", mddev->chunk_size/1024);
if (conf->near_copies > 1) if (conf->near_copies > 1)
seq_printf(seq, " %d near-copies", conf->near_copies); seq_printf(seq, " %d near-copies", conf->near_copies);
if (conf->far_copies > 1) if (conf->far_copies > 1) {
if (conf->far_offset)
seq_printf(seq, " %d offset-copies", conf->far_copies);
else
seq_printf(seq, " %d far-copies", conf->far_copies); seq_printf(seq, " %d far-copies", conf->far_copies);
}
seq_printf(seq, " [%d/%d] [", conf->raid_disks, seq_printf(seq, " [%d/%d] [", conf->raid_disks,
conf->working_disks); conf->working_disks);
for (i = 0; i < conf->raid_disks; i++) for (i = 0; i < conf->raid_disks; i++)
...@@ -1915,7 +1932,7 @@ static int run(mddev_t *mddev) ...@@ -1915,7 +1932,7 @@ static int run(mddev_t *mddev)
mirror_info_t *disk; mirror_info_t *disk;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct list_head *tmp; struct list_head *tmp;
int nc, fc; int nc, fc, fo;
sector_t stride, size; sector_t stride, size;
if (mddev->chunk_size == 0) { if (mddev->chunk_size == 0) {
...@@ -1925,8 +1942,9 @@ static int run(mddev_t *mddev) ...@@ -1925,8 +1942,9 @@ static int run(mddev_t *mddev)
nc = mddev->layout & 255; nc = mddev->layout & 255;
fc = (mddev->layout >> 8) & 255; fc = (mddev->layout >> 8) & 255;
fo = mddev->layout & (1<<16);
if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks ||
(mddev->layout >> 16)) { (mddev->layout >> 17)) {
printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n", printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n",
mdname(mddev), mddev->layout); mdname(mddev), mddev->layout);
goto out; goto out;
...@@ -1958,12 +1976,16 @@ static int run(mddev_t *mddev) ...@@ -1958,12 +1976,16 @@ static int run(mddev_t *mddev)
conf->near_copies = nc; conf->near_copies = nc;
conf->far_copies = fc; conf->far_copies = fc;
conf->copies = nc*fc; conf->copies = nc*fc;
conf->far_offset = fo;
conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1; conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1;
conf->chunk_shift = ffz(~mddev->chunk_size) - 9; conf->chunk_shift = ffz(~mddev->chunk_size) - 9;
if (fo)
conf->stride = 1 << conf->chunk_shift;
else {
stride = mddev->size >> (conf->chunk_shift-1); stride = mddev->size >> (conf->chunk_shift-1);
sector_div(stride, fc); sector_div(stride, fc);
conf->stride = stride << conf->chunk_shift; conf->stride = stride << conf->chunk_shift;
}
conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc, conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc,
r10bio_pool_free, conf); r10bio_pool_free, conf);
if (!conf->r10bio_pool) { if (!conf->r10bio_pool) {
......
...@@ -24,11 +24,16 @@ struct r10_private_data_s { ...@@ -24,11 +24,16 @@ struct r10_private_data_s {
int far_copies; /* number of copies layed out int far_copies; /* number of copies layed out
* at large strides across drives * at large strides across drives
*/ */
int far_offset; /* far_copies are offset by 1 stripe
* instead of many
*/
int copies; /* near_copies * far_copies. int copies; /* near_copies * far_copies.
* must be <= raid_disks * must be <= raid_disks
*/ */
sector_t stride; /* distance between far copies. sector_t stride; /* distance between far copies.
* This is size / far_copies * This is size / far_copies unless
* far_offset, in which case it is
* 1 stripe.
*/ */
int chunk_shift; /* shift from chunks to sectors */ int chunk_shift; /* shift from chunks to sectors */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment