Commit 4dbcdc75 authored by NeilBrown's avatar NeilBrown Committed by Linus Torvalds

[PATCH] md: count corrected read errors per drive

Store this total in superblock (As appropriate), and make it available to
userspace via sysfs.
Signed-off-by: default avatarNeil Brown <neilb@suse.de>
Acked-by: default avatarGreg KH <greg@kroah.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent d9d166c2
...@@ -222,6 +222,17 @@ Each directory contains: ...@@ -222,6 +222,17 @@ Each directory contains:
of being recoverred to of being recoverred to
This list make grow in future. This list make grow in future.
errors
An approximate count of read errors that have been detected on
this device but have not caused the device to be evicted from
the array (either because they were corrected or because they
happened while the array was read-only). When using version-1
metadata, this value persists across restarts of the array.
This value can be written while assembling an array thus
providing an ongoing count for arrays with metadata managed by
userspace.
An active md device will also contain and entry for each active device An active md device will also contain and entry for each active device
in the array. These are named in the array. These are named
......
...@@ -1000,6 +1000,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) ...@@ -1000,6 +1000,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
} }
rdev->preferred_minor = 0xffff; rdev->preferred_minor = 0xffff;
rdev->data_offset = le64_to_cpu(sb->data_offset); rdev->data_offset = le64_to_cpu(sb->data_offset);
atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1; bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1;
...@@ -1139,6 +1140,8 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -1139,6 +1140,8 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
else else
sb->resync_offset = cpu_to_le64(0); sb->resync_offset = cpu_to_le64(0);
sb->cnt_corrected_read = atomic_read(&rdev->corrected_errors);
if (mddev->bitmap && mddev->bitmap_file == NULL) { if (mddev->bitmap && mddev->bitmap_file == NULL) {
sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
...@@ -1592,9 +1595,30 @@ super_show(mdk_rdev_t *rdev, char *page) ...@@ -1592,9 +1595,30 @@ super_show(mdk_rdev_t *rdev, char *page)
} }
static struct rdev_sysfs_entry rdev_super = __ATTR_RO(super); static struct rdev_sysfs_entry rdev_super = __ATTR_RO(super);
static ssize_t
errors_show(mdk_rdev_t *rdev, char *page)
{
return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
}
static ssize_t
errors_store(mdk_rdev_t *rdev, const char *buf, size_t len)
{
char *e;
unsigned long n = simple_strtoul(buf, &e, 10);
if (*buf && (*e == 0 || *e == '\n')) {
atomic_set(&rdev->corrected_errors, n);
return len;
}
return -EINVAL;
}
static struct rdev_sysfs_entry rdev_errors =
__ATTR(errors, 0644, errors_show, errors_store);
static struct attribute *rdev_default_attrs[] = { static struct attribute *rdev_default_attrs[] = {
&rdev_state.attr, &rdev_state.attr,
&rdev_super.attr, &rdev_super.attr,
&rdev_errors.attr,
NULL, NULL,
}; };
static ssize_t static ssize_t
...@@ -1674,6 +1698,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi ...@@ -1674,6 +1698,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
rdev->data_offset = 0; rdev->data_offset = 0;
atomic_set(&rdev->nr_pending, 0); atomic_set(&rdev->nr_pending, 0);
atomic_set(&rdev->read_errors, 0); atomic_set(&rdev->read_errors, 0);
atomic_set(&rdev->corrected_errors, 0);
size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
if (!size) { if (!size) {
...@@ -4729,7 +4754,7 @@ static int set_ro(const char *val, struct kernel_param *kp) ...@@ -4729,7 +4754,7 @@ static int set_ro(const char *val, struct kernel_param *kp)
int num = simple_strtoul(val, &e, 10); int num = simple_strtoul(val, &e, 10);
if (*val && (*e == '\0' || *e == '\n')) { if (*val && (*e == '\0' || *e == '\n')) {
start_readonly = num; start_readonly = num;
return 0;; return 0;
} }
return -EINVAL; return -EINVAL;
} }
......
...@@ -1265,6 +1265,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) ...@@ -1265,6 +1265,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
if (r1_bio->bios[d]->bi_end_io != end_sync_read) if (r1_bio->bios[d]->bi_end_io != end_sync_read)
continue; continue;
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
atomic_add(s, &rdev->corrected_errors);
if (sync_page_io(rdev->bdev, if (sync_page_io(rdev->bdev,
sect + rdev->data_offset, sect + rdev->data_offset,
s<<9, s<<9,
...@@ -1463,6 +1464,7 @@ static void raid1d(mddev_t *mddev) ...@@ -1463,6 +1464,7 @@ static void raid1d(mddev_t *mddev)
d = conf->raid_disks; d = conf->raid_disks;
d--; d--;
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
atomic_add(s, &rdev->corrected_errors);
if (rdev && if (rdev &&
test_bit(In_sync, &rdev->flags)) { test_bit(In_sync, &rdev->flags)) {
if (sync_page_io(rdev->bdev, if (sync_page_io(rdev->bdev,
......
...@@ -1122,9 +1122,13 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) ...@@ -1122,9 +1122,13 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
if (test_bit(BIO_UPTODATE, &bio->bi_flags)) if (test_bit(BIO_UPTODATE, &bio->bi_flags))
set_bit(R10BIO_Uptodate, &r10_bio->state); set_bit(R10BIO_Uptodate, &r10_bio->state);
else if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery)) else {
md_error(r10_bio->mddev, atomic_add(r10_bio->sectors,
conf->mirrors[d].rdev); &conf->mirrors[d].rdev->corrected_errors);
if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
md_error(r10_bio->mddev,
conf->mirrors[d].rdev);
}
/* for reconstruct, we always reschedule after a read. /* for reconstruct, we always reschedule after a read.
* for resync, only after all reads * for resync, only after all reads
...@@ -1430,6 +1434,7 @@ static void raid10d(mddev_t *mddev) ...@@ -1430,6 +1434,7 @@ static void raid10d(mddev_t *mddev)
sl--; sl--;
d = r10_bio->devs[sl].devnum; d = r10_bio->devs[sl].devnum;
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
atomic_add(s, &rdev->corrected_errors);
if (rdev && if (rdev &&
test_bit(In_sync, &rdev->flags)) { test_bit(In_sync, &rdev->flags)) {
if (sync_page_io(rdev->bdev, if (sync_page_io(rdev->bdev,
......
...@@ -1400,6 +1400,9 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -1400,6 +1400,9 @@ static void handle_stripe(struct stripe_head *sh)
bi->bi_io_vec[0].bv_offset = 0; bi->bi_io_vec[0].bv_offset = 0;
bi->bi_size = STRIPE_SIZE; bi->bi_size = STRIPE_SIZE;
bi->bi_next = NULL; bi->bi_next = NULL;
if (rw == WRITE &&
test_bit(R5_ReWrite, &sh->dev[i].flags))
atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
generic_make_request(bi); generic_make_request(bi);
} else { } else {
if (rw == 1) if (rw == 1)
......
...@@ -1562,6 +1562,9 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) ...@@ -1562,6 +1562,9 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
bi->bi_io_vec[0].bv_offset = 0; bi->bi_io_vec[0].bv_offset = 0;
bi->bi_size = STRIPE_SIZE; bi->bi_size = STRIPE_SIZE;
bi->bi_next = NULL; bi->bi_next = NULL;
if (rw == WRITE &&
test_bit(R5_ReWrite, &sh->dev[i].flags))
atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
generic_make_request(bi); generic_make_request(bi);
} else { } else {
if (rw == 1) if (rw == 1)
......
...@@ -95,6 +95,10 @@ struct mdk_rdev_s ...@@ -95,6 +95,10 @@ struct mdk_rdev_s
atomic_t read_errors; /* number of consecutive read errors that atomic_t read_errors; /* number of consecutive read errors that
* we have tried to ignore. * we have tried to ignore.
*/ */
atomic_t corrected_errors; /* number of corrected read errors,
* for reporting to userspace and storing
* in superblock.
*/
}; };
struct mddev_s struct mddev_s
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment