Commit 67cc2b81 authored by NeilBrown's avatar NeilBrown

md/raid5: finish support for DDF/raid6

DDF requires RAID6 calculations over different devices in a different
order.
For md/raid6, we calculate over just the data devices, starting
immediately after the 'Q' block.
For ddf/raid6 we calculate over all devices, using zeros in place of
the P and Q blocks.

This requires unfortunately complex loops...
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 99c0fb5f
...@@ -136,6 +136,10 @@ static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) ...@@ -136,6 +136,10 @@ static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
/* Find first data disk in a raid6 stripe */ /* Find first data disk in a raid6 stripe */
static inline int raid6_d0(struct stripe_head *sh) static inline int raid6_d0(struct stripe_head *sh)
{ {
if (sh->ddf_layout)
/* ddf always start from first device */
return 0;
/* md starts just after Q block */
if (sh->qd_idx == sh->disks - 1) if (sh->qd_idx == sh->disks - 1)
return 0; return 0;
else else
...@@ -152,13 +156,15 @@ static inline int raid6_next_disk(int disk, int raid_disks) ...@@ -152,13 +156,15 @@ static inline int raid6_next_disk(int disk, int raid_disks)
* 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk
* is raid_disks-1. This help does that mapping. * is raid_disks-1. This help does that mapping.
*/ */
static int raid6_idx_to_slot(int idx, struct stripe_head *sh, int *count) static int raid6_idx_to_slot(int idx, struct stripe_head *sh,
int *count, int syndrome_disks)
{ {
int slot; int slot;
if (idx == sh->pd_idx) if (idx == sh->pd_idx)
return sh->disks - 2; return syndrome_disks;
if (idx == sh->qd_idx) if (idx == sh->qd_idx)
return sh->disks - 1; return syndrome_disks + 1;
slot = (*count)++; slot = (*count)++;
return slot; return slot;
} }
...@@ -1267,6 +1273,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, ...@@ -1267,6 +1273,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
unsigned long chunk_number; unsigned long chunk_number;
unsigned int chunk_offset; unsigned int chunk_offset;
int pd_idx, qd_idx; int pd_idx, qd_idx;
int ddf_layout = 0;
sector_t new_sector; sector_t new_sector;
int sectors_per_chunk = conf->chunk_size >> 9; int sectors_per_chunk = conf->chunk_size >> 9;
int raid_disks = previous ? conf->previous_raid_disks int raid_disks = previous ? conf->previous_raid_disks
...@@ -1386,6 +1393,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, ...@@ -1386,6 +1393,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
qd_idx = 0; qd_idx = 0;
} else if (*dd_idx >= pd_idx) } else if (*dd_idx >= pd_idx)
(*dd_idx) += 2; /* D D P Q D */ (*dd_idx) += 2; /* D D P Q D */
ddf_layout = 1;
break; break;
case ALGORITHM_ROTATING_N_RESTART: case ALGORITHM_ROTATING_N_RESTART:
...@@ -1400,6 +1408,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, ...@@ -1400,6 +1408,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
qd_idx = 0; qd_idx = 0;
} else if (*dd_idx >= pd_idx) } else if (*dd_idx >= pd_idx)
(*dd_idx) += 2; /* D D P Q D */ (*dd_idx) += 2; /* D D P Q D */
ddf_layout = 1;
break; break;
case ALGORITHM_ROTATING_N_CONTINUE: case ALGORITHM_ROTATING_N_CONTINUE:
...@@ -1407,6 +1416,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, ...@@ -1407,6 +1416,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
pd_idx = raid_disks - 1 - (stripe % raid_disks); pd_idx = raid_disks - 1 - (stripe % raid_disks);
qd_idx = (pd_idx + raid_disks - 1) % raid_disks; qd_idx = (pd_idx + raid_disks - 1) % raid_disks;
*dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
ddf_layout = 1;
break; break;
case ALGORITHM_LEFT_ASYMMETRIC_6: case ALGORITHM_LEFT_ASYMMETRIC_6:
...@@ -1454,6 +1464,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, ...@@ -1454,6 +1464,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
if (sh) { if (sh) {
sh->pd_idx = pd_idx; sh->pd_idx = pd_idx;
sh->qd_idx = qd_idx; sh->qd_idx = qd_idx;
sh->ddf_layout = ddf_layout;
} }
/* /*
* Finally, compute the new sector number * Finally, compute the new sector number
...@@ -1642,9 +1653,10 @@ static void compute_parity6(struct stripe_head *sh, int method) ...@@ -1642,9 +1653,10 @@ static void compute_parity6(struct stripe_head *sh, int method)
{ {
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count; int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
struct bio *chosen; struct bio *chosen;
/**** FIX THIS: This could be very bad if disks is close to 256 ****/ /**** FIX THIS: This could be very bad if disks is close to 256 ****/
void *ptrs[disks]; void *ptrs[syndrome_disks+2];
pd_idx = sh->pd_idx; pd_idx = sh->pd_idx;
qd_idx = sh->qd_idx; qd_idx = sh->qd_idx;
...@@ -1687,23 +1699,28 @@ static void compute_parity6(struct stripe_head *sh, int method) ...@@ -1687,23 +1699,28 @@ static void compute_parity6(struct stripe_head *sh, int method)
} }
/* Note that unlike RAID-5, the ordering of the disks matters greatly.*/ /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/
/* FIX: Is this ordering of drives even remotely optimal? */
for (i = 0; i < disks; i++)
ptrs[i] = (void *)raid6_empty_zero_page;
count = 0; count = 0;
i = d0_idx; i = d0_idx;
do { do {
int slot = raid6_idx_to_slot(i, sh, &count); int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
ptrs[slot] = page_address(sh->dev[i].page); ptrs[slot] = page_address(sh->dev[i].page);
if (slot < sh->disks - 2 && if (slot < syndrome_disks &&
!test_bit(R5_UPTODATE, &sh->dev[i].flags)) { !test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
printk(KERN_ERR "block %d/%d not uptodate " printk(KERN_ERR "block %d/%d not uptodate "
"on parity calc\n", i, count); "on parity calc\n", i, count);
BUG(); BUG();
} }
i = raid6_next_disk(i, disks); i = raid6_next_disk(i, disks);
} while (i != d0_idx); } while (i != d0_idx);
BUG_ON(count+2 != disks); BUG_ON(count != syndrome_disks);
raid6_call.gen_syndrome(disks, STRIPE_SIZE, ptrs); raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs);
switch(method) { switch(method) {
case RECONSTRUCT_WRITE: case RECONSTRUCT_WRITE:
...@@ -1761,24 +1778,28 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero) ...@@ -1761,24 +1778,28 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
{ {
int i, count, disks = sh->disks; int i, count, disks = sh->disks;
int syndrome_disks = sh->ddf_layout ? disks : disks-2;
int d0_idx = raid6_d0(sh); int d0_idx = raid6_d0(sh);
int faila = -1, failb = -1; int faila = -1, failb = -1;
/**** FIX THIS: This could be very bad if disks is close to 256 ****/ /**** FIX THIS: This could be very bad if disks is close to 256 ****/
void *ptrs[disks]; void *ptrs[syndrome_disks+2];
for (i = 0; i < disks ; i++)
ptrs[i] = (void *)raid6_empty_zero_page;
count = 0; count = 0;
i = d0_idx; i = d0_idx;
do { do {
int slot; int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
slot = raid6_idx_to_slot(i, sh, &count);
ptrs[slot] = page_address(sh->dev[i].page); ptrs[slot] = page_address(sh->dev[i].page);
if (i == dd_idx1) if (i == dd_idx1)
faila = slot; faila = slot;
if (i == dd_idx2) if (i == dd_idx2)
failb = slot; failb = slot;
i = raid6_next_disk(i, disks); i = raid6_next_disk(i, disks);
} while (i != d0_idx); } while (i != d0_idx);
BUG_ON(count+2 != disks); BUG_ON(count != syndrome_disks);
BUG_ON(faila == failb); BUG_ON(faila == failb);
if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
...@@ -1787,9 +1808,9 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) ...@@ -1787,9 +1808,9 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
(unsigned long long)sh->sector, dd_idx1, dd_idx2, (unsigned long long)sh->sector, dd_idx1, dd_idx2,
faila, failb); faila, failb);
if ( failb == disks-1 ) { if (failb == syndrome_disks+1) {
/* Q disk is one of the missing disks */ /* Q disk is one of the missing disks */
if ( faila == disks-2 ) { if (faila == syndrome_disks) {
/* Missing P+Q, just recompute */ /* Missing P+Q, just recompute */
compute_parity6(sh, UPDATE_PARITY); compute_parity6(sh, UPDATE_PARITY);
return; return;
...@@ -1804,12 +1825,13 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) ...@@ -1804,12 +1825,13 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
} }
/* We're missing D+P or D+D; */ /* We're missing D+P or D+D; */
if (failb == disks-2) { if (failb == syndrome_disks) {
/* We're missing D+P. */ /* We're missing D+P. */
raid6_datap_recov(disks, STRIPE_SIZE, faila, ptrs); raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs);
} else { } else {
/* We're missing D+D. */ /* We're missing D+D. */
raid6_2data_recov(disks, STRIPE_SIZE, faila, failb, ptrs); raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb,
ptrs);
} }
/* Both the above update both missing blocks */ /* Both the above update both missing blocks */
......
...@@ -201,6 +201,7 @@ struct stripe_head { ...@@ -201,6 +201,7 @@ struct stripe_head {
sector_t sector; /* sector of this row */ sector_t sector; /* sector of this row */
short pd_idx; /* parity disk index */ short pd_idx; /* parity disk index */
short qd_idx; /* 'Q' disk index for raid6 */ short qd_idx; /* 'Q' disk index for raid6 */
short ddf_layout;/* use DDF ordering to calculate Q */
unsigned long state; /* state flags */ unsigned long state; /* state flags */
atomic_t count; /* nr of active thread/requests */ atomic_t count; /* nr of active thread/requests */
spinlock_t lock; spinlock_t lock;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment