Commit ee7fee0b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md:
  md: remove rd%d links immediately after stopping an array.
  md: remove ability to explicit set an inactive array to 'clean'.
  md: constify VFTs
  md: tidy up status_resync to handle large arrays.
  md: fix some (more) errors with bitmaps on devices larger than 2TB.
  md/raid10: don't clear bitmap during recovery if array will still be degraded.
  md: fix loading of out-of-date bitmap.
parents 8a0a9bd4 c4647292
...@@ -986,6 +986,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) ...@@ -986,6 +986,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
oldindex = index; oldindex = index;
oldpage = page; oldpage = page;
bitmap->filemap[bitmap->file_pages++] = page;
bitmap->last_page_size = count;
if (outofdate) { if (outofdate) {
/* /*
* if bitmap is out of date, dirty the * if bitmap is out of date, dirty the
...@@ -998,16 +1001,10 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) ...@@ -998,16 +1001,10 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
write_page(bitmap, page, 1); write_page(bitmap, page, 1);
ret = -EIO; ret = -EIO;
if (bitmap->flags & BITMAP_WRITE_ERROR) { if (bitmap->flags & BITMAP_WRITE_ERROR)
/* release, page not in filemap yet */
put_page(page);
goto err; goto err;
} }
} }
bitmap->filemap[bitmap->file_pages++] = page;
bitmap->last_page_size = count;
}
paddr = kmap_atomic(page, KM_USER0); paddr = kmap_atomic(page, KM_USER0);
if (bitmap->flags & BITMAP_HOSTENDIAN) if (bitmap->flags & BITMAP_HOSTENDIAN)
b = test_bit(bit, paddr); b = test_bit(bit, paddr);
...@@ -1016,9 +1013,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) ...@@ -1016,9 +1013,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
kunmap_atomic(paddr, KM_USER0); kunmap_atomic(paddr, KM_USER0);
if (b) { if (b) {
/* if the disk bit is set, set the memory bit */ /* if the disk bit is set, set the memory bit */
bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap), int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap))
((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start) >= start);
); bitmap_set_memory_bits(bitmap,
(sector_t)i << CHUNK_BLOCK_SHIFT(bitmap),
needed);
bit_cnt++; bit_cnt++;
set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
} }
...@@ -1154,7 +1153,8 @@ void bitmap_daemon_work(struct bitmap *bitmap) ...@@ -1154,7 +1153,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
spin_lock_irqsave(&bitmap->lock, flags); spin_lock_irqsave(&bitmap->lock, flags);
clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
} }
bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), bmc = bitmap_get_counter(bitmap,
(sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
&blocks, 0); &blocks, 0);
if (bmc) { if (bmc) {
/* /*
...@@ -1169,7 +1169,8 @@ void bitmap_daemon_work(struct bitmap *bitmap) ...@@ -1169,7 +1169,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
} else if (*bmc == 1) { } else if (*bmc == 1) {
/* we can clear the bit */ /* we can clear the bit */
*bmc = 0; *bmc = 0;
bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), bitmap_count_page(bitmap,
(sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
-1); -1);
/* clear the bit */ /* clear the bit */
...@@ -1514,7 +1515,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) ...@@ -1514,7 +1515,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
unsigned long chunk; unsigned long chunk;
for (chunk = s; chunk <= e; chunk++) { for (chunk = s; chunk <= e; chunk++) {
sector_t sec = chunk << CHUNK_BLOCK_SHIFT(bitmap); sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap);
bitmap_set_memory_bits(bitmap, sec, 1); bitmap_set_memory_bits(bitmap, sec, 1);
bitmap_file_set_bit(bitmap, sec); bitmap_file_set_bit(bitmap, sec);
} }
......
...@@ -3066,11 +3066,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) ...@@ -3066,11 +3066,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
} else } else
err = -EBUSY; err = -EBUSY;
spin_unlock_irq(&mddev->write_lock); spin_unlock_irq(&mddev->write_lock);
} else { } else
mddev->ro = 0; err = -EINVAL;
mddev->recovery_cp = MaxSector;
err = do_md_run(mddev);
}
break; break;
case active: case active:
if (mddev->pers) { if (mddev->pers) {
...@@ -4297,6 +4294,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) ...@@ -4297,6 +4294,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
{ {
int err = 0; int err = 0;
struct gendisk *disk = mddev->gendisk; struct gendisk *disk = mddev->gendisk;
mdk_rdev_t *rdev;
if (atomic_read(&mddev->openers) > is_open) { if (atomic_read(&mddev->openers) > is_open) {
printk("md: %s still in use.\n",mdname(mddev)); printk("md: %s still in use.\n",mdname(mddev));
...@@ -4339,6 +4337,13 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) ...@@ -4339,6 +4337,13 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
/* tell userspace to handle 'inactive' */ /* tell userspace to handle 'inactive' */
sysfs_notify_dirent(mddev->sysfs_state); sysfs_notify_dirent(mddev->sysfs_state);
list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0) {
char nm[20];
sprintf(nm, "rd%d", rdev->raid_disk);
sysfs_remove_link(&mddev->kobj, nm);
}
set_capacity(disk, 0); set_capacity(disk, 0);
mddev->changed = 1; mddev->changed = 1;
...@@ -4359,7 +4364,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) ...@@ -4359,7 +4364,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
* Free resources if final stop * Free resources if final stop
*/ */
if (mode == 0) { if (mode == 0) {
mdk_rdev_t *rdev;
printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
...@@ -4371,13 +4375,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) ...@@ -4371,13 +4375,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
} }
mddev->bitmap_offset = 0; mddev->bitmap_offset = 0;
list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0) {
char nm[20];
sprintf(nm, "rd%d", rdev->raid_disk);
sysfs_remove_link(&mddev->kobj, nm);
}
/* make sure all md_delayed_delete calls have finished */ /* make sure all md_delayed_delete calls have finished */
flush_scheduled_work(); flush_scheduled_work();
...@@ -5705,37 +5702,38 @@ static void status_unused(struct seq_file *seq) ...@@ -5705,37 +5702,38 @@ static void status_unused(struct seq_file *seq)
static void status_resync(struct seq_file *seq, mddev_t * mddev) static void status_resync(struct seq_file *seq, mddev_t * mddev)
{ {
sector_t max_blocks, resync, res; sector_t max_sectors, resync, res;
unsigned long dt, db, rt; unsigned long dt, db;
sector_t rt;
int scale; int scale;
unsigned int per_milli; unsigned int per_milli;
resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2; resync = mddev->curr_resync - atomic_read(&mddev->recovery_active);
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
max_blocks = mddev->resync_max_sectors >> 1; max_sectors = mddev->resync_max_sectors;
else else
max_blocks = mddev->dev_sectors / 2; max_sectors = mddev->dev_sectors;
/* /*
* Should not happen. * Should not happen.
*/ */
if (!max_blocks) { if (!max_sectors) {
MD_BUG(); MD_BUG();
return; return;
} }
/* Pick 'scale' such that (resync>>scale)*1000 will fit /* Pick 'scale' such that (resync>>scale)*1000 will fit
* in a sector_t, and (max_blocks>>scale) will fit in a * in a sector_t, and (max_sectors>>scale) will fit in a
* u32, as those are the requirements for sector_div. * u32, as those are the requirements for sector_div.
* Thus 'scale' must be at least 10 * Thus 'scale' must be at least 10
*/ */
scale = 10; scale = 10;
if (sizeof(sector_t) > sizeof(unsigned long)) { if (sizeof(sector_t) > sizeof(unsigned long)) {
while ( max_blocks/2 > (1ULL<<(scale+32))) while ( max_sectors/2 > (1ULL<<(scale+32)))
scale++; scale++;
} }
res = (resync>>scale)*1000; res = (resync>>scale)*1000;
sector_div(res, (u32)((max_blocks>>scale)+1)); sector_div(res, (u32)((max_sectors>>scale)+1));
per_milli = res; per_milli = res;
{ {
...@@ -5756,25 +5754,35 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) ...@@ -5756,25 +5754,35 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ? (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
"resync" : "recovery"))), "resync" : "recovery"))),
per_milli/10, per_milli % 10, per_milli/10, per_milli % 10,
(unsigned long long) resync, (unsigned long long) resync/2,
(unsigned long long) max_blocks); (unsigned long long) max_sectors/2);
/* /*
* We do not want to overflow, so the order of operands and
* the * 100 / 100 trick are important. We do a +1 to be
* safe against division by zero. We only estimate anyway.
*
* dt: time from mark until now * dt: time from mark until now
* db: blocks written from mark until now * db: blocks written from mark until now
* rt: remaining time * rt: remaining time
*
* rt is a sector_t, so could be 32bit or 64bit.
* So we divide before multiply in case it is 32bit and close
* to the limit.
* We scale the divisor (db) by 32 to avoid loosing precision
* near the end of resync when the number of remaining sectors
* is close to 'db'.
* We then divide rt by 32 after multiplying by db to compensate.
* The '+1' avoids division by zero if db is very small.
*/ */
dt = ((jiffies - mddev->resync_mark) / HZ); dt = ((jiffies - mddev->resync_mark) / HZ);
if (!dt) dt++; if (!dt) dt++;
db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active)) db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
- mddev->resync_mark_cnt; - mddev->resync_mark_cnt;
rt = (dt * ((unsigned long)(max_blocks-resync) / (db/2/100+1)))/100;
seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); rt = max_sectors - resync; /* number of remaining sectors */
sector_div(rt, db/32+1);
rt *= dt;
rt >>= 5;
seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
((unsigned long)rt % 60)/6);
seq_printf(seq, " speed=%ldK/sec", db/2/dt); seq_printf(seq, " speed=%ldK/sec", db/2/dt);
} }
...@@ -5965,7 +5973,7 @@ static int md_seq_show(struct seq_file *seq, void *v) ...@@ -5965,7 +5973,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
return 0; return 0;
} }
static struct seq_operations md_seq_ops = { static const struct seq_operations md_seq_ops = {
.start = md_seq_start, .start = md_seq_start,
.next = md_seq_next, .next = md_seq_next,
.stop = md_seq_stop, .stop = md_seq_stop,
......
...@@ -1809,17 +1809,17 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i ...@@ -1809,17 +1809,17 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
r10_bio->sector = sect; r10_bio->sector = sect;
raid10_find_phys(conf, r10_bio); raid10_find_phys(conf, r10_bio);
/* Need to check if this section will still be
/* Need to check if the array will still be
* degraded * degraded
*/ */
for (j=0; j<conf->copies;j++) { for (j=0; j<conf->raid_disks; j++)
int d = r10_bio->devs[j].devnum; if (conf->mirrors[j].rdev == NULL ||
if (conf->mirrors[d].rdev == NULL || test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
test_bit(Faulty, &conf->mirrors[d].rdev->flags)) {
still_degraded = 1; still_degraded = 1;
break; break;
} }
}
must_sync = bitmap_start_sync(mddev->bitmap, sect, must_sync = bitmap_start_sync(mddev->bitmap, sect,
&sync_blocks, still_degraded); &sync_blocks, still_degraded);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment