Commit d276bb29 authored by Jens Axboe's avatar Jens Axboe

Merge tag 'md-next-20230729' of...

Merge tag 'md-next-20230729' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.6/block

Pull MD updates from Song:

"1. Deprecate bitmap file support, by Christoph Hellwig;
 2. Fix deadlock with md sync thread, by Yu Kuai;
 3. Refactor md io accounting, by Yu Kuai;
 4. Various non-urgent fixes by Li Nan, Yu Kuai, and Jack Wang."

* tag 'md-next-20230729' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md: (36 commits)
  md/md-bitmap: hold 'reconfig_mutex' in backlog_store()
  md/md-bitmap: remove unnecessary local variable in backlog_store()
  md/raid10: use dereference_rdev_and_rrdev() to get devices
  md/raid10: factor out dereference_rdev_and_rrdev()
  md/raid10: check replacement and rdev to prevent submit the same io twice
  md/raid1: Avoid lock contention from wake_up()
  md: restore 'noio_flag' for the last mddev_resume()
  md: don't quiesce in mddev_suspend()
  md: remove redundant check in fix_read_error()
  md/raid10: optimize fix_read_error
  md/raid1: prioritize adding disk to 'removed' mirror
  md/md-faulty: enable io accounting
  md/md-linear: enable io accounting
  md/md-multipath: enable io accounting
  md/raid10: switch to use md_account_bio() for io accounting
  md/raid1: switch to use md_account_bio() for io accounting
  raid5: fix missing io accounting in raid5_align_endio()
  md: also clone new io if io accounting is disabled
  md: move initialization and destruction of 'io_acct_set' to md.c
  md: deprecate bitmap file support
  ...
parents 51d74ec9 44abfa6a
...@@ -50,6 +50,16 @@ config MD_AUTODETECT ...@@ -50,6 +50,16 @@ config MD_AUTODETECT
If unsure, say Y. If unsure, say Y.
config MD_BITMAP_FILE
bool "MD bitmap file support (deprecated)"
default y
help
If you say Y here, support for write intent bitmaps in files on an
external file system is enabled. This is an alternative to the internal
bitmaps near the MD superblock, and very problematic code that abuses
various kernel APIs and can only work with files on a file system not
actually sitting on the MD device.
config MD_LINEAR config MD_LINEAR
tristate "Linear (append) mode (deprecated)" tristate "Linear (append) mode (deprecated)"
depends on BLK_DEV_MD depends on BLK_DEV_MD
......
...@@ -3725,7 +3725,6 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv, ...@@ -3725,7 +3725,6 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) { if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
if (mddev->sync_thread) { if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_unregister_thread(&mddev->sync_thread);
md_reap_sync_thread(mddev); md_reap_sync_thread(mddev);
} }
} else if (decipher_sync_action(mddev, mddev->recovery) != st_idle) } else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
......
...@@ -139,29 +139,26 @@ static void md_bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page ...@@ -139,29 +139,26 @@ static void md_bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page
*/ */
/* IO operations when bitmap is stored near all superblocks */ /* IO operations when bitmap is stored near all superblocks */
/* choose a good rdev and read the page from there */
static int read_sb_page(struct mddev *mddev, loff_t offset, static int read_sb_page(struct mddev *mddev, loff_t offset,
struct page *page, struct page *page, unsigned long index, int size)
unsigned long index, int size)
{ {
/* choose a good rdev and read the page from there */
sector_t sector = mddev->bitmap_info.offset + offset +
index * (PAGE_SIZE / SECTOR_SIZE);
struct md_rdev *rdev; struct md_rdev *rdev;
sector_t target;
rdev_for_each(rdev, mddev) { rdev_for_each(rdev, mddev) {
if (! test_bit(In_sync, &rdev->flags) u32 iosize = roundup(size, bdev_logical_block_size(rdev->bdev));
|| test_bit(Faulty, &rdev->flags)
|| test_bit(Bitmap_sync, &rdev->flags))
continue;
target = offset + index * (PAGE_SIZE/512); if (!test_bit(In_sync, &rdev->flags) ||
test_bit(Faulty, &rdev->flags) ||
test_bit(Bitmap_sync, &rdev->flags))
continue;
if (sync_page_io(rdev, target, if (sync_page_io(rdev, sector, iosize, page, REQ_OP_READ, true))
roundup(size, bdev_logical_block_size(rdev->bdev)),
page, REQ_OP_READ, true)) {
page->index = index;
return 0; return 0;
}
} }
return -EIO; return -EIO;
} }
...@@ -225,18 +222,19 @@ static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size, ...@@ -225,18 +222,19 @@ static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size,
} }
static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap, static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
struct page *page) unsigned long pg_index, struct page *page)
{ {
struct block_device *bdev; struct block_device *bdev;
struct mddev *mddev = bitmap->mddev; struct mddev *mddev = bitmap->mddev;
struct bitmap_storage *store = &bitmap->storage; struct bitmap_storage *store = &bitmap->storage;
loff_t sboff, offset = mddev->bitmap_info.offset; loff_t sboff, offset = mddev->bitmap_info.offset;
sector_t ps, doff; sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE;
unsigned int size = PAGE_SIZE; unsigned int size = PAGE_SIZE;
unsigned int opt_size = PAGE_SIZE; unsigned int opt_size = PAGE_SIZE;
sector_t doff;
bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev; bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
if (page->index == store->file_pages - 1) { if (pg_index == store->file_pages - 1) {
unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1); unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1);
if (last_page_size == 0) if (last_page_size == 0)
...@@ -245,7 +243,6 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap, ...@@ -245,7 +243,6 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
opt_size = optimal_io_size(bdev, last_page_size, size); opt_size = optimal_io_size(bdev, last_page_size, size);
} }
ps = page->index * PAGE_SIZE / SECTOR_SIZE;
sboff = rdev->sb_start + offset; sboff = rdev->sb_start + offset;
doff = rdev->data_offset; doff = rdev->data_offset;
...@@ -279,55 +276,41 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap, ...@@ -279,55 +276,41 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
return 0; return 0;
} }
static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) static void write_sb_page(struct bitmap *bitmap, unsigned long pg_index,
struct page *page, bool wait)
{ {
struct md_rdev *rdev;
struct mddev *mddev = bitmap->mddev; struct mddev *mddev = bitmap->mddev;
int ret;
do { do {
rdev = NULL; struct md_rdev *rdev = NULL;
while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
ret = __write_sb_page(rdev, bitmap, page); if (__write_sb_page(rdev, bitmap, pg_index, page) < 0) {
if (ret) set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
return ret; return;
}
} }
} while (wait && md_super_wait(mddev) < 0); } while (wait && md_super_wait(mddev) < 0);
return 0;
} }
static void md_bitmap_file_kick(struct bitmap *bitmap); static void md_bitmap_file_kick(struct bitmap *bitmap);
/*
* write out a page to a file
*/
static void write_page(struct bitmap *bitmap, struct page *page, int wait)
{
struct buffer_head *bh;
if (bitmap->storage.file == NULL) {
switch (write_sb_page(bitmap, page, wait)) {
case -EINVAL:
set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
}
} else {
bh = page_buffers(page);
while (bh && bh->b_blocknr) { #ifdef CONFIG_MD_BITMAP_FILE
atomic_inc(&bitmap->pending_writes); static void write_file_page(struct bitmap *bitmap, struct page *page, int wait)
set_buffer_locked(bh); {
set_buffer_mapped(bh); struct buffer_head *bh = page_buffers(page);
submit_bh(REQ_OP_WRITE | REQ_SYNC, bh);
bh = bh->b_this_page;
}
if (wait) while (bh && bh->b_blocknr) {
wait_event(bitmap->write_wait, atomic_inc(&bitmap->pending_writes);
atomic_read(&bitmap->pending_writes)==0); set_buffer_locked(bh);
set_buffer_mapped(bh);
submit_bh(REQ_OP_WRITE | REQ_SYNC, bh);
bh = bh->b_this_page;
} }
if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
md_bitmap_file_kick(bitmap); if (wait)
wait_event(bitmap->write_wait,
atomic_read(&bitmap->pending_writes) == 0);
} }
static void end_bitmap_write(struct buffer_head *bh, int uptodate) static void end_bitmap_write(struct buffer_head *bh, int uptodate)
...@@ -364,10 +347,8 @@ static void free_buffers(struct page *page) ...@@ -364,10 +347,8 @@ static void free_buffers(struct page *page)
* This usage is similar to how swap files are handled, and allows us * This usage is similar to how swap files are handled, and allows us
* to write to a file with no concerns of memory allocation failing. * to write to a file with no concerns of memory allocation failing.
*/ */
static int read_page(struct file *file, unsigned long index, static int read_file_page(struct file *file, unsigned long index,
struct bitmap *bitmap, struct bitmap *bitmap, unsigned long count, struct page *page)
unsigned long count,
struct page *page)
{ {
int ret = 0; int ret = 0;
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
...@@ -415,7 +396,6 @@ static int read_page(struct file *file, unsigned long index, ...@@ -415,7 +396,6 @@ static int read_page(struct file *file, unsigned long index,
blk_cur++; blk_cur++;
bh = bh->b_this_page; bh = bh->b_this_page;
} }
page->index = index;
wait_event(bitmap->write_wait, wait_event(bitmap->write_wait,
atomic_read(&bitmap->pending_writes)==0); atomic_read(&bitmap->pending_writes)==0);
...@@ -429,11 +409,45 @@ static int read_page(struct file *file, unsigned long index, ...@@ -429,11 +409,45 @@ static int read_page(struct file *file, unsigned long index,
ret); ret);
return ret; return ret;
} }
#else /* CONFIG_MD_BITMAP_FILE */
static void write_file_page(struct bitmap *bitmap, struct page *page, int wait)
{
}
static int read_file_page(struct file *file, unsigned long index,
struct bitmap *bitmap, unsigned long count, struct page *page)
{
return -EIO;
}
static void free_buffers(struct page *page)
{
put_page(page);
}
#endif /* CONFIG_MD_BITMAP_FILE */
/* /*
* bitmap file superblock operations * bitmap file superblock operations
*/ */
/*
* write out a page to a file
*/
static void filemap_write_page(struct bitmap *bitmap, unsigned long pg_index,
bool wait)
{
struct bitmap_storage *store = &bitmap->storage;
struct page *page = store->filemap[pg_index];
if (mddev_is_clustered(bitmap->mddev)) {
pg_index += bitmap->cluster_slot *
DIV_ROUND_UP(store->bytes, PAGE_SIZE);
}
if (store->file)
write_file_page(bitmap, page, wait);
else
write_sb_page(bitmap, pg_index, page, wait);
}
/* /*
* md_bitmap_wait_writes() should be called before writing any bitmap * md_bitmap_wait_writes() should be called before writing any bitmap
* blocks, to ensure previous writes, particularly from * blocks, to ensure previous writes, particularly from
...@@ -488,7 +502,12 @@ void md_bitmap_update_sb(struct bitmap *bitmap) ...@@ -488,7 +502,12 @@ void md_bitmap_update_sb(struct bitmap *bitmap)
sb->sectors_reserved = cpu_to_le32(bitmap->mddev-> sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
bitmap_info.space); bitmap_info.space);
kunmap_atomic(sb); kunmap_atomic(sb);
write_page(bitmap, bitmap->storage.sb_page, 1);
if (bitmap->storage.file)
write_file_page(bitmap, bitmap->storage.sb_page, 1);
else
write_sb_page(bitmap, bitmap->storage.sb_index,
bitmap->storage.sb_page, 1);
} }
EXPORT_SYMBOL(md_bitmap_update_sb); EXPORT_SYMBOL(md_bitmap_update_sb);
...@@ -540,7 +559,7 @@ static int md_bitmap_new_disk_sb(struct bitmap *bitmap) ...@@ -540,7 +559,7 @@ static int md_bitmap_new_disk_sb(struct bitmap *bitmap)
bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO); bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (bitmap->storage.sb_page == NULL) if (bitmap->storage.sb_page == NULL)
return -ENOMEM; return -ENOMEM;
bitmap->storage.sb_page->index = 0; bitmap->storage.sb_index = 0;
sb = kmap_atomic(bitmap->storage.sb_page); sb = kmap_atomic(bitmap->storage.sb_page);
...@@ -601,7 +620,7 @@ static int md_bitmap_read_sb(struct bitmap *bitmap) ...@@ -601,7 +620,7 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
unsigned long sectors_reserved = 0; unsigned long sectors_reserved = 0;
int err = -EINVAL; int err = -EINVAL;
struct page *sb_page; struct page *sb_page;
loff_t offset = bitmap->mddev->bitmap_info.offset; loff_t offset = 0;
if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) { if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
chunksize = 128 * 1024 * 1024; chunksize = 128 * 1024 * 1024;
...@@ -628,7 +647,7 @@ static int md_bitmap_read_sb(struct bitmap *bitmap) ...@@ -628,7 +647,7 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t); bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
/* to 4k blocks */ /* to 4k blocks */
bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096); bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3)); offset = bitmap->cluster_slot * (bm_blocks << 3);
pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__, pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
bitmap->cluster_slot, offset); bitmap->cluster_slot, offset);
} }
...@@ -637,13 +656,11 @@ static int md_bitmap_read_sb(struct bitmap *bitmap) ...@@ -637,13 +656,11 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host); loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize; int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
err = read_page(bitmap->storage.file, 0, err = read_file_page(bitmap->storage.file, 0,
bitmap, bytes, sb_page); bitmap, bytes, sb_page);
} else { } else {
err = read_sb_page(bitmap->mddev, err = read_sb_page(bitmap->mddev, offset, sb_page, 0,
offset, sizeof(bitmap_super_t));
sb_page,
0, sizeof(bitmap_super_t));
} }
if (err) if (err)
return err; return err;
...@@ -819,7 +836,7 @@ static int md_bitmap_storage_alloc(struct bitmap_storage *store, ...@@ -819,7 +836,7 @@ static int md_bitmap_storage_alloc(struct bitmap_storage *store,
if (store->sb_page) { if (store->sb_page) {
store->filemap[0] = store->sb_page; store->filemap[0] = store->sb_page;
pnum = 1; pnum = 1;
store->sb_page->index = offset; store->sb_index = offset;
} }
for ( ; pnum < num_pages; pnum++) { for ( ; pnum < num_pages; pnum++) {
...@@ -828,7 +845,6 @@ static int md_bitmap_storage_alloc(struct bitmap_storage *store, ...@@ -828,7 +845,6 @@ static int md_bitmap_storage_alloc(struct bitmap_storage *store,
store->file_pages = pnum; store->file_pages = pnum;
return -ENOMEM; return -ENOMEM;
} }
store->filemap[pnum]->index = pnum + offset;
} }
store->file_pages = pnum; store->file_pages = pnum;
...@@ -847,14 +863,10 @@ static int md_bitmap_storage_alloc(struct bitmap_storage *store, ...@@ -847,14 +863,10 @@ static int md_bitmap_storage_alloc(struct bitmap_storage *store,
static void md_bitmap_file_unmap(struct bitmap_storage *store) static void md_bitmap_file_unmap(struct bitmap_storage *store)
{ {
struct page **map, *sb_page; struct file *file = store->file;
int pages; struct page *sb_page = store->sb_page;
struct file *file; struct page **map = store->filemap;
int pages = store->file_pages;
file = store->file;
map = store->filemap;
pages = store->file_pages;
sb_page = store->sb_page;
while (pages--) while (pages--)
if (map[pages] != sb_page) /* 0 is sb_page, release it below */ if (map[pages] != sb_page) /* 0 is sb_page, release it below */
...@@ -879,21 +891,13 @@ static void md_bitmap_file_unmap(struct bitmap_storage *store) ...@@ -879,21 +891,13 @@ static void md_bitmap_file_unmap(struct bitmap_storage *store)
*/ */
static void md_bitmap_file_kick(struct bitmap *bitmap) static void md_bitmap_file_kick(struct bitmap *bitmap)
{ {
char *path, *ptr = NULL;
if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) { if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) {
md_bitmap_update_sb(bitmap); md_bitmap_update_sb(bitmap);
if (bitmap->storage.file) { if (bitmap->storage.file) {
path = kmalloc(PAGE_SIZE, GFP_KERNEL); pr_warn("%s: kicking failed bitmap file %pD4 from array!\n",
if (path) bmname(bitmap), bitmap->storage.file);
ptr = file_path(bitmap->storage.file,
path, PAGE_SIZE);
pr_warn("%s: kicking failed bitmap file %s from array!\n",
bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
kfree(path);
} else } else
pr_warn("%s: disabling internal bitmap due to errors\n", pr_warn("%s: disabling internal bitmap due to errors\n",
bmname(bitmap)); bmname(bitmap));
...@@ -945,6 +949,7 @@ static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) ...@@ -945,6 +949,7 @@ static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
void *kaddr; void *kaddr;
unsigned long chunk = block >> bitmap->counts.chunkshift; unsigned long chunk = block >> bitmap->counts.chunkshift;
struct bitmap_storage *store = &bitmap->storage; struct bitmap_storage *store = &bitmap->storage;
unsigned long index = file_page_index(store, chunk);
unsigned long node_offset = 0; unsigned long node_offset = 0;
if (mddev_is_clustered(bitmap->mddev)) if (mddev_is_clustered(bitmap->mddev))
...@@ -962,9 +967,9 @@ static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) ...@@ -962,9 +967,9 @@ static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
else else
set_bit_le(bit, kaddr); set_bit_le(bit, kaddr);
kunmap_atomic(kaddr); kunmap_atomic(kaddr);
pr_debug("set file bit %lu page %lu\n", bit, page->index); pr_debug("set file bit %lu page %lu\n", bit, index);
/* record page number so it gets flushed to disk when unplug occurs */ /* record page number so it gets flushed to disk when unplug occurs */
set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_DIRTY); set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_DIRTY);
} }
static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
...@@ -974,6 +979,7 @@ static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) ...@@ -974,6 +979,7 @@ static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
void *paddr; void *paddr;
unsigned long chunk = block >> bitmap->counts.chunkshift; unsigned long chunk = block >> bitmap->counts.chunkshift;
struct bitmap_storage *store = &bitmap->storage; struct bitmap_storage *store = &bitmap->storage;
unsigned long index = file_page_index(store, chunk);
unsigned long node_offset = 0; unsigned long node_offset = 0;
if (mddev_is_clustered(bitmap->mddev)) if (mddev_is_clustered(bitmap->mddev))
...@@ -989,8 +995,8 @@ static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) ...@@ -989,8 +995,8 @@ static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
else else
clear_bit_le(bit, paddr); clear_bit_le(bit, paddr);
kunmap_atomic(paddr); kunmap_atomic(paddr);
if (!test_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_NEEDWRITE)) { if (!test_page_attr(bitmap, index - node_offset, BITMAP_PAGE_NEEDWRITE)) {
set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_PENDING); set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_PENDING);
bitmap->allclean = 0; bitmap->allclean = 0;
} }
} }
...@@ -1042,7 +1048,7 @@ void md_bitmap_unplug(struct bitmap *bitmap) ...@@ -1042,7 +1048,7 @@ void md_bitmap_unplug(struct bitmap *bitmap)
"md bitmap_unplug"); "md bitmap_unplug");
} }
clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING); clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
write_page(bitmap, bitmap->storage.filemap[i], 0); filemap_write_page(bitmap, i, false);
writing = 1; writing = 1;
} }
} }
...@@ -1084,33 +1090,31 @@ void md_bitmap_unplug_async(struct bitmap *bitmap) ...@@ -1084,33 +1090,31 @@ void md_bitmap_unplug_async(struct bitmap *bitmap)
EXPORT_SYMBOL(md_bitmap_unplug_async); EXPORT_SYMBOL(md_bitmap_unplug_async);
static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
* the in-memory bitmap from the on-disk bitmap -- also, sets up the /*
* memory mapping of the bitmap file * Initialize the in-memory bitmap from the on-disk bitmap and set up the memory
* Special cases: * mapping of the bitmap file.
* if there's no bitmap file, or if the bitmap file had been *
* previously kicked from the array, we mark all the bits as * Special case: If there's no bitmap file, or if the bitmap file had been
* 1's in order to cause a full resync. * previously kicked from the array, we mark all the bits as 1's in order to
* cause a full resync.
* *
* We ignore all bits for sectors that end earlier than 'start'. * We ignore all bits for sectors that end earlier than 'start'.
* This is used when reading an out-of-date bitmap... * This is used when reading an out-of-date bitmap.
*/ */
static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
{ {
unsigned long i, chunks, index, oldindex, bit, node_offset = 0; bool outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
struct page *page = NULL; struct mddev *mddev = bitmap->mddev;
unsigned long bit_cnt = 0; unsigned long chunks = bitmap->counts.chunks;
struct file *file;
unsigned long offset;
int outofdate;
int ret = -ENOSPC;
void *paddr;
struct bitmap_storage *store = &bitmap->storage; struct bitmap_storage *store = &bitmap->storage;
struct file *file = store->file;
unsigned long node_offset = 0;
unsigned long bit_cnt = 0;
unsigned long i;
int ret;
chunks = bitmap->counts.chunks; if (!file && !mddev->bitmap_info.offset) {
file = store->file;
if (!file && !bitmap->mddev->bitmap_info.offset) {
/* No permanent bitmap - fill with '1s'. */ /* No permanent bitmap - fill with '1s'. */
store->filemap = NULL; store->filemap = NULL;
store->file_pages = 0; store->file_pages = 0;
...@@ -1125,77 +1129,79 @@ static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) ...@@ -1125,77 +1129,79 @@ static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
return 0; return 0;
} }
outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
if (outofdate)
pr_warn("%s: bitmap file is out of date, doing full recovery\n", bmname(bitmap));
if (file && i_size_read(file->f_mapping->host) < store->bytes) { if (file && i_size_read(file->f_mapping->host) < store->bytes) {
pr_warn("%s: bitmap file too short %lu < %lu\n", pr_warn("%s: bitmap file too short %lu < %lu\n",
bmname(bitmap), bmname(bitmap),
(unsigned long) i_size_read(file->f_mapping->host), (unsigned long) i_size_read(file->f_mapping->host),
store->bytes); store->bytes);
ret = -ENOSPC;
goto err; goto err;
} }
oldindex = ~0L; if (mddev_is_clustered(mddev))
offset = 0;
if (!bitmap->mddev->bitmap_info.external)
offset = sizeof(bitmap_super_t);
if (mddev_is_clustered(bitmap->mddev))
node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE)); node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE));
for (i = 0; i < chunks; i++) { for (i = 0; i < store->file_pages; i++) {
int b; struct page *page = store->filemap[i];
index = file_page_index(&bitmap->storage, i); int count;
bit = file_page_offset(&bitmap->storage, i);
if (index != oldindex) { /* this is a new page, read it in */
int count;
/* unmap the old page, we're done with it */
if (index == store->file_pages-1)
count = store->bytes - index * PAGE_SIZE;
else
count = PAGE_SIZE;
page = store->filemap[index];
if (file)
ret = read_page(file, index, bitmap,
count, page);
else
ret = read_sb_page(
bitmap->mddev,
bitmap->mddev->bitmap_info.offset,
page,
index + node_offset, count);
if (ret) /* unmap the old page, we're done with it */
goto err; if (i == store->file_pages - 1)
count = store->bytes - i * PAGE_SIZE;
else
count = PAGE_SIZE;
oldindex = index; if (file)
ret = read_file_page(file, i, bitmap, count, page);
else
ret = read_sb_page(mddev, 0, page, i + node_offset,
count);
if (ret)
goto err;
}
if (outofdate) { if (outofdate) {
/* pr_warn("%s: bitmap file is out of date, doing full recovery\n",
* if bitmap is out of date, dirty the bmname(bitmap));
* whole page and write it out
*/
paddr = kmap_atomic(page);
memset(paddr + offset, 0xff,
PAGE_SIZE - offset);
kunmap_atomic(paddr);
write_page(bitmap, page, 1);
for (i = 0; i < store->file_pages; i++) {
struct page *page = store->filemap[i];
unsigned long offset = 0;
void *paddr;
if (i == 0 && !mddev->bitmap_info.external)
offset = sizeof(bitmap_super_t);
/*
* If the bitmap is out of date, dirty the whole page
* and write it out
*/
paddr = kmap_atomic(page);
memset(paddr + offset, 0xff, PAGE_SIZE - offset);
kunmap_atomic(paddr);
filemap_write_page(bitmap, i, true);
if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) {
ret = -EIO; ret = -EIO;
if (test_bit(BITMAP_WRITE_ERROR, goto err;
&bitmap->flags))
goto err;
} }
} }
}
for (i = 0; i < chunks; i++) {
struct page *page = filemap_get_page(&bitmap->storage, i);
unsigned long bit = file_page_offset(&bitmap->storage, i);
void *paddr;
bool was_set;
paddr = kmap_atomic(page); paddr = kmap_atomic(page);
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
b = test_bit(bit, paddr); was_set = test_bit(bit, paddr);
else else
b = test_bit_le(bit, paddr); was_set = test_bit_le(bit, paddr);
kunmap_atomic(paddr); kunmap_atomic(paddr);
if (b) {
if (was_set) {
/* if the disk bit is set, set the memory bit */ /* if the disk bit is set, set the memory bit */
int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift
>= start); >= start);
...@@ -1204,7 +1210,6 @@ static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) ...@@ -1204,7 +1210,6 @@ static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
needed); needed);
bit_cnt++; bit_cnt++;
} }
offset = 0;
} }
pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n", pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n",
...@@ -1396,9 +1401,8 @@ void md_bitmap_daemon_work(struct mddev *mddev) ...@@ -1396,9 +1401,8 @@ void md_bitmap_daemon_work(struct mddev *mddev)
break; break;
if (bitmap->storage.filemap && if (bitmap->storage.filemap &&
test_and_clear_page_attr(bitmap, j, test_and_clear_page_attr(bitmap, j,
BITMAP_PAGE_NEEDWRITE)) { BITMAP_PAGE_NEEDWRITE))
write_page(bitmap, bitmap->storage.filemap[j], 0); filemap_write_page(bitmap, j, false);
}
} }
done: done:
...@@ -2542,6 +2546,10 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -2542,6 +2546,10 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len)
if (backlog > COUNTER_MAX) if (backlog > COUNTER_MAX)
return -EINVAL; return -EINVAL;
rv = mddev_lock(mddev);
if (rv)
return rv;
/* /*
* Without write mostly device, it doesn't make sense to set * Without write mostly device, it doesn't make sense to set
* backlog for max_write_behind. * backlog for max_write_behind.
...@@ -2555,6 +2563,7 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -2555,6 +2563,7 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len)
if (!has_write_mostly) { if (!has_write_mostly) {
pr_warn_ratelimited("%s: can't set backlog, no write mostly device available\n", pr_warn_ratelimited("%s: can't set backlog, no write mostly device available\n",
mdname(mddev)); mdname(mddev));
mddev_unlock(mddev);
return -EINVAL; return -EINVAL;
} }
...@@ -2565,13 +2574,13 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -2565,13 +2574,13 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len)
mddev_destroy_serial_pool(mddev, NULL, false); mddev_destroy_serial_pool(mddev, NULL, false);
} else if (backlog && !mddev->serial_info_pool) { } else if (backlog && !mddev->serial_info_pool) {
/* serial_info_pool is needed since backlog is not zero */ /* serial_info_pool is needed since backlog is not zero */
struct md_rdev *rdev;
rdev_for_each(rdev, mddev) rdev_for_each(rdev, mddev)
mddev_create_serial_pool(mddev, rdev, false); mddev_create_serial_pool(mddev, rdev, false);
} }
if (old_mwb != backlog) if (old_mwb != backlog)
md_bitmap_update_sb(mddev->bitmap); md_bitmap_update_sb(mddev->bitmap);
mddev_unlock(mddev);
return len; return len;
} }
......
...@@ -201,6 +201,7 @@ struct bitmap { ...@@ -201,6 +201,7 @@ struct bitmap {
struct file *file; /* backing disk file */ struct file *file; /* backing disk file */
struct page *sb_page; /* cached copy of the bitmap struct page *sb_page; /* cached copy of the bitmap
* file superblock */ * file superblock */
unsigned long sb_index;
struct page **filemap; /* list of cache pages for struct page **filemap; /* list of cache pages for
* the file */ * the file */
unsigned long *filemap_attr; /* attributes associated unsigned long *filemap_attr; /* attributes associated
......
...@@ -204,6 +204,8 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio) ...@@ -204,6 +204,8 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio)
failit = 1; failit = 1;
} }
} }
md_account_bio(mddev, &bio);
if (failit) { if (failit) {
struct bio *b = bio_alloc_clone(conf->rdev->bdev, bio, GFP_NOIO, struct bio *b = bio_alloc_clone(conf->rdev->bdev, bio, GFP_NOIO,
&mddev->bio_set); &mddev->bio_set);
......
...@@ -238,6 +238,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio) ...@@ -238,6 +238,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
bio = split; bio = split;
} }
md_account_bio(mddev, &bio);
bio_set_dev(bio, tmp_dev->rdev->bdev); bio_set_dev(bio, tmp_dev->rdev->bdev);
bio->bi_iter.bi_sector = bio->bi_iter.bi_sector - bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
start_sector + data_offset; start_sector + data_offset;
......
...@@ -107,6 +107,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio) ...@@ -107,6 +107,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
&& md_flush_request(mddev, bio)) && md_flush_request(mddev, bio))
return true; return true;
md_account_bio(mddev, &bio);
mp_bh = mempool_alloc(&conf->pool, GFP_NOIO); mp_bh = mempool_alloc(&conf->pool, GFP_NOIO);
mp_bh->master_bio = bio; mp_bh->master_bio = bio;
......
...@@ -453,7 +453,6 @@ void mddev_suspend(struct mddev *mddev) ...@@ -453,7 +453,6 @@ void mddev_suspend(struct mddev *mddev)
mddev->pers->prepare_suspend(mddev); mddev->pers->prepare_suspend(mddev);
wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io)); wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io));
mddev->pers->quiesce(mddev, 1);
clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags); clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags)); wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
...@@ -465,14 +464,15 @@ EXPORT_SYMBOL_GPL(mddev_suspend); ...@@ -465,14 +464,15 @@ EXPORT_SYMBOL_GPL(mddev_suspend);
void mddev_resume(struct mddev *mddev) void mddev_resume(struct mddev *mddev)
{ {
/* entred the memalloc scope from mddev_suspend() */
memalloc_noio_restore(mddev->noio_flag);
lockdep_assert_held(&mddev->reconfig_mutex); lockdep_assert_held(&mddev->reconfig_mutex);
if (--mddev->suspended) if (--mddev->suspended)
return; return;
/* entred the memalloc scope from mddev_suspend() */
memalloc_noio_restore(mddev->noio_flag);
percpu_ref_resurrect(&mddev->active_io); percpu_ref_resurrect(&mddev->active_io);
wake_up(&mddev->sb_wait); wake_up(&mddev->sb_wait);
mddev->pers->quiesce(mddev, 0);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
...@@ -643,6 +643,7 @@ void mddev_init(struct mddev *mddev) ...@@ -643,6 +643,7 @@ void mddev_init(struct mddev *mddev)
{ {
mutex_init(&mddev->open_mutex); mutex_init(&mddev->open_mutex);
mutex_init(&mddev->reconfig_mutex); mutex_init(&mddev->reconfig_mutex);
mutex_init(&mddev->sync_mutex);
mutex_init(&mddev->bitmap_info.mutex); mutex_init(&mddev->bitmap_info.mutex);
INIT_LIST_HEAD(&mddev->disks); INIT_LIST_HEAD(&mddev->disks);
INIT_LIST_HEAD(&mddev->all_mddevs); INIT_LIST_HEAD(&mddev->all_mddevs);
...@@ -650,6 +651,7 @@ void mddev_init(struct mddev *mddev) ...@@ -650,6 +651,7 @@ void mddev_init(struct mddev *mddev)
timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0); timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
atomic_set(&mddev->active, 1); atomic_set(&mddev->active, 1);
atomic_set(&mddev->openers, 0); atomic_set(&mddev->openers, 0);
atomic_set(&mddev->sync_seq, 0);
spin_lock_init(&mddev->lock); spin_lock_init(&mddev->lock);
atomic_set(&mddev->flush_pending, 0); atomic_set(&mddev->flush_pending, 0);
init_waitqueue_head(&mddev->sb_wait); init_waitqueue_head(&mddev->sb_wait);
...@@ -2304,7 +2306,7 @@ int md_integrity_register(struct mddev *mddev) ...@@ -2304,7 +2306,7 @@ int md_integrity_register(struct mddev *mddev)
pr_debug("md: data integrity enabled on %s\n", mdname(mddev)); pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) || if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
(mddev->level != 1 && mddev->level != 10 && (mddev->level != 1 && mddev->level != 10 &&
bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE))) { bioset_integrity_create(&mddev->io_clone_set, BIO_POOL_SIZE))) {
/* /*
* No need to handle the failure of bioset_integrity_create, * No need to handle the failure of bioset_integrity_create,
* because the function is called by md_run() -> pers->run(), * because the function is called by md_run() -> pers->run(),
...@@ -4747,6 +4749,62 @@ action_show(struct mddev *mddev, char *page) ...@@ -4747,6 +4749,62 @@ action_show(struct mddev *mddev, char *page)
return sprintf(page, "%s\n", type); return sprintf(page, "%s\n", type);
} }
static void stop_sync_thread(struct mddev *mddev)
{
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return;
if (mddev_lock(mddev))
return;
/*
* Check again in case MD_RECOVERY_RUNNING is cleared before lock is
* held.
*/
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
mddev_unlock(mddev);
return;
}
if (work_pending(&mddev->del_work))
flush_workqueue(md_misc_wq);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
/*
* Thread might be blocked waiting for metadata update which will now
* never happen
*/
md_wakeup_thread_directly(mddev->sync_thread);
mddev_unlock(mddev);
}
static void idle_sync_thread(struct mddev *mddev)
{
int sync_seq = atomic_read(&mddev->sync_seq);
mutex_lock(&mddev->sync_mutex);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
stop_sync_thread(mddev);
wait_event(resync_wait, sync_seq != atomic_read(&mddev->sync_seq) ||
!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
mutex_unlock(&mddev->sync_mutex);
}
static void frozen_sync_thread(struct mddev *mddev)
{
mutex_lock(&mddev->sync_mutex);
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
stop_sync_thread(mddev);
wait_event(resync_wait, mddev->sync_thread == NULL &&
!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
mutex_unlock(&mddev->sync_mutex);
}
static ssize_t static ssize_t
action_store(struct mddev *mddev, const char *page, size_t len) action_store(struct mddev *mddev, const char *page, size_t len)
{ {
...@@ -4754,35 +4812,11 @@ action_store(struct mddev *mddev, const char *page, size_t len) ...@@ -4754,35 +4812,11 @@ action_store(struct mddev *mddev, const char *page, size_t len)
return -EINVAL; return -EINVAL;
if (cmd_match(page, "idle") || cmd_match(page, "frozen")) { if (cmd_match(page, "idle"))
if (cmd_match(page, "frozen")) idle_sync_thread(mddev);
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); else if (cmd_match(page, "frozen"))
else frozen_sync_thread(mddev);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
mddev_lock(mddev) == 0) {
if (work_pending(&mddev->del_work))
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
sector_t save_rp = mddev->reshape_position;
mddev_unlock(mddev);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_unregister_thread(&mddev->sync_thread);
mddev_lock_nointr(mddev);
/*
* set RECOVERY_INTR again and restore reshape
* position in case others changed them after
* got lock, eg, reshape_position_store and
* md_check_recovery.
*/
mddev->reshape_position = save_rp;
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev);
}
mddev_unlock(mddev);
}
} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return -EBUSY; return -EBUSY;
else if (cmd_match(page, "resync")) else if (cmd_match(page, "resync"))
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
...@@ -5842,6 +5876,13 @@ int md_run(struct mddev *mddev) ...@@ -5842,6 +5876,13 @@ int md_run(struct mddev *mddev)
goto exit_bio_set; goto exit_bio_set;
} }
if (!bioset_initialized(&mddev->io_clone_set)) {
err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
offsetof(struct md_io_clone, bio_clone), 0);
if (err)
goto exit_sync_set;
}
spin_lock(&pers_lock); spin_lock(&pers_lock);
pers = find_pers(mddev->level, mddev->clevel); pers = find_pers(mddev->level, mddev->clevel);
if (!pers || !try_module_get(pers->owner)) { if (!pers || !try_module_get(pers->owner)) {
...@@ -6019,6 +6060,8 @@ int md_run(struct mddev *mddev) ...@@ -6019,6 +6060,8 @@ int md_run(struct mddev *mddev)
module_put(pers->owner); module_put(pers->owner);
md_bitmap_destroy(mddev); md_bitmap_destroy(mddev);
abort: abort:
bioset_exit(&mddev->io_clone_set);
exit_sync_set:
bioset_exit(&mddev->sync_set); bioset_exit(&mddev->sync_set);
exit_bio_set: exit_bio_set:
bioset_exit(&mddev->bio_set); bioset_exit(&mddev->bio_set);
...@@ -6176,7 +6219,6 @@ static void __md_stop_writes(struct mddev *mddev) ...@@ -6176,7 +6219,6 @@ static void __md_stop_writes(struct mddev *mddev)
flush_workqueue(md_misc_wq); flush_workqueue(md_misc_wq);
if (mddev->sync_thread) { if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_unregister_thread(&mddev->sync_thread);
md_reap_sync_thread(mddev); md_reap_sync_thread(mddev);
} }
...@@ -6243,6 +6285,7 @@ static void __md_stop(struct mddev *mddev) ...@@ -6243,6 +6285,7 @@ static void __md_stop(struct mddev *mddev)
percpu_ref_exit(&mddev->active_io); percpu_ref_exit(&mddev->active_io);
bioset_exit(&mddev->bio_set); bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set); bioset_exit(&mddev->sync_set);
bioset_exit(&mddev->io_clone_set);
} }
void md_stop(struct mddev *mddev) void md_stop(struct mddev *mddev)
...@@ -7010,6 +7053,15 @@ static int set_bitmap_file(struct mddev *mddev, int fd) ...@@ -7010,6 +7053,15 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
if (mddev->bitmap || mddev->bitmap_info.file) if (mddev->bitmap || mddev->bitmap_info.file)
return -EEXIST; /* cannot add when bitmap is present */ return -EEXIST; /* cannot add when bitmap is present */
if (!IS_ENABLED(CONFIG_MD_BITMAP_FILE)) {
pr_warn("%s: bitmap files not supported by this kernel\n",
mdname(mddev));
return -EINVAL;
}
pr_warn("%s: using deprecated bitmap file support\n",
mdname(mddev));
f = fget(fd); f = fget(fd);
if (f == NULL) { if (f == NULL) {
...@@ -8599,62 +8651,44 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, ...@@ -8599,62 +8651,44 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
} }
EXPORT_SYMBOL_GPL(md_submit_discard_bio); EXPORT_SYMBOL_GPL(md_submit_discard_bio);
int acct_bioset_init(struct mddev *mddev) static void md_end_clone_io(struct bio *bio)
{ {
int err = 0; struct md_io_clone *md_io_clone = bio->bi_private;
struct bio *orig_bio = md_io_clone->orig_bio;
if (!bioset_initialized(&mddev->io_acct_set)) struct mddev *mddev = md_io_clone->mddev;
err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
offsetof(struct md_io_acct, bio_clone), 0);
return err;
}
EXPORT_SYMBOL_GPL(acct_bioset_init);
void acct_bioset_exit(struct mddev *mddev)
{
bioset_exit(&mddev->io_acct_set);
}
EXPORT_SYMBOL_GPL(acct_bioset_exit);
static void md_end_io_acct(struct bio *bio)
{
struct md_io_acct *md_io_acct = bio->bi_private;
struct bio *orig_bio = md_io_acct->orig_bio;
struct mddev *mddev = md_io_acct->mddev;
orig_bio->bi_status = bio->bi_status; orig_bio->bi_status = bio->bi_status;
bio_end_io_acct(orig_bio, md_io_acct->start_time); if (md_io_clone->start_time)
bio_end_io_acct(orig_bio, md_io_clone->start_time);
bio_put(bio); bio_put(bio);
bio_endio(orig_bio); bio_endio(orig_bio);
percpu_ref_put(&mddev->active_io); percpu_ref_put(&mddev->active_io);
} }
/* static void md_clone_bio(struct mddev *mddev, struct bio **bio)
* Used by personalities that don't already clone the bio and thus can't
* easily add the timestamp to their extended bio structure.
*/
void md_account_bio(struct mddev *mddev, struct bio **bio)
{ {
struct block_device *bdev = (*bio)->bi_bdev; struct block_device *bdev = (*bio)->bi_bdev;
struct md_io_acct *md_io_acct; struct md_io_clone *md_io_clone;
struct bio *clone; struct bio *clone =
bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_clone_set);
if (!blk_queue_io_stat(bdev->bd_disk->queue))
return; md_io_clone = container_of(clone, struct md_io_clone, bio_clone);
md_io_clone->orig_bio = *bio;
md_io_clone->mddev = mddev;
if (blk_queue_io_stat(bdev->bd_disk->queue))
md_io_clone->start_time = bio_start_io_acct(*bio);
clone->bi_end_io = md_end_clone_io;
clone->bi_private = md_io_clone;
*bio = clone;
}
void md_account_bio(struct mddev *mddev, struct bio **bio)
{
percpu_ref_get(&mddev->active_io); percpu_ref_get(&mddev->active_io);
md_clone_bio(mddev, bio);
clone = bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_acct_set);
md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
md_io_acct->orig_bio = *bio;
md_io_acct->start_time = bio_start_io_acct(*bio);
md_io_acct->mddev = mddev;
clone->bi_end_io = md_end_io_acct;
clone->bi_private = md_io_acct;
*bio = clone;
} }
EXPORT_SYMBOL_GPL(md_account_bio); EXPORT_SYMBOL_GPL(md_account_bio);
...@@ -9327,7 +9361,6 @@ void md_check_recovery(struct mddev *mddev) ...@@ -9327,7 +9361,6 @@ void md_check_recovery(struct mddev *mddev)
* ->spare_active and clear saved_raid_disk * ->spare_active and clear saved_raid_disk
*/ */
set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_unregister_thread(&mddev->sync_thread);
md_reap_sync_thread(mddev); md_reap_sync_thread(mddev);
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
...@@ -9356,17 +9389,24 @@ void md_check_recovery(struct mddev *mddev) ...@@ -9356,17 +9389,24 @@ void md_check_recovery(struct mddev *mddev)
if (mddev->sb_flags) if (mddev->sb_flags)
md_update_sb(mddev, 0); md_update_sb(mddev, 0);
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && /*
!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { * Never start a new sync thread if MD_RECOVERY_RUNNING is
/* resync/recovery still happening */ * still set.
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); */
goto unlock; if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
} if (!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
if (mddev->sync_thread) { /* resync/recovery still happening */
md_unregister_thread(&mddev->sync_thread); clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
goto unlock;
}
if (WARN_ON_ONCE(!mddev->sync_thread))
goto unlock;
md_reap_sync_thread(mddev); md_reap_sync_thread(mddev);
goto unlock; goto unlock;
} }
/* Set RUNNING before clearing NEEDED to avoid /* Set RUNNING before clearing NEEDED to avoid
* any transients in the value of "sync_action". * any transients in the value of "sync_action".
*/ */
...@@ -9443,7 +9483,10 @@ void md_reap_sync_thread(struct mddev *mddev) ...@@ -9443,7 +9483,10 @@ void md_reap_sync_thread(struct mddev *mddev)
sector_t old_dev_sectors = mddev->dev_sectors; sector_t old_dev_sectors = mddev->dev_sectors;
bool is_reshaped = false; bool is_reshaped = false;
/* sync_thread should be unregistered, collect result */ /* resync has finished, collect result */
md_unregister_thread(&mddev->sync_thread);
atomic_inc(&mddev->sync_seq);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
mddev->degraded != mddev->raid_disks) { mddev->degraded != mddev->raid_disks) {
...@@ -9488,7 +9531,6 @@ void md_reap_sync_thread(struct mddev *mddev) ...@@ -9488,7 +9531,6 @@ void md_reap_sync_thread(struct mddev *mddev)
if (mddev_is_clustered(mddev) && is_reshaped if (mddev_is_clustered(mddev) && is_reshaped
&& !test_bit(MD_CLOSING, &mddev->flags)) && !test_bit(MD_CLOSING, &mddev->flags))
md_cluster_ops->update_size(mddev, old_dev_sectors); md_cluster_ops->update_size(mddev, old_dev_sectors);
wake_up(&resync_wait);
/* flag recovery needed just to double check */ /* flag recovery needed just to double check */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
sysfs_notify_dirent_safe(mddev->sysfs_completed); sysfs_notify_dirent_safe(mddev->sysfs_completed);
...@@ -9496,6 +9538,7 @@ void md_reap_sync_thread(struct mddev *mddev) ...@@ -9496,6 +9538,7 @@ void md_reap_sync_thread(struct mddev *mddev)
md_new_event(); md_new_event();
if (mddev->event_work.func) if (mddev->event_work.func)
queue_work(md_misc_wq, &mddev->event_work); queue_work(md_misc_wq, &mddev->event_work);
wake_up(&resync_wait);
} }
EXPORT_SYMBOL(md_reap_sync_thread); EXPORT_SYMBOL(md_reap_sync_thread);
......
...@@ -510,7 +510,7 @@ struct mddev { ...@@ -510,7 +510,7 @@ struct mddev {
struct bio_set sync_set; /* for sync operations like struct bio_set sync_set; /* for sync operations like
* metadata and bitmap writes * metadata and bitmap writes
*/ */
struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */ struct bio_set io_clone_set;
/* Generic flush handling. /* Generic flush handling.
* The last to finish preflush schedules a worker to submit * The last to finish preflush schedules a worker to submit
...@@ -535,6 +535,11 @@ struct mddev { ...@@ -535,6 +535,11 @@ struct mddev {
*/ */
struct list_head deleting; struct list_head deleting;
/* Used to synchronize idle and frozen for action_store() */
struct mutex sync_mutex;
/* The sequence number for sync thread */
atomic_t sync_seq;
bool has_superblocks:1; bool has_superblocks:1;
bool fail_last_dev:1; bool fail_last_dev:1;
bool serialize_policy:1; bool serialize_policy:1;
...@@ -731,7 +736,7 @@ struct md_thread { ...@@ -731,7 +736,7 @@ struct md_thread {
void *private; void *private;
}; };
struct md_io_acct { struct md_io_clone {
struct mddev *mddev; struct mddev *mddev;
struct bio *orig_bio; struct bio *orig_bio;
unsigned long start_time; unsigned long start_time;
...@@ -769,8 +774,6 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev); ...@@ -769,8 +774,6 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
extern void md_finish_reshape(struct mddev *mddev); extern void md_finish_reshape(struct mddev *mddev);
void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
struct bio *bio, sector_t start, sector_t size); struct bio *bio, sector_t start, sector_t size);
int acct_bioset_init(struct mddev *mddev);
void acct_bioset_exit(struct mddev *mddev);
void md_account_bio(struct mddev *mddev, struct bio **bio); void md_account_bio(struct mddev *mddev, struct bio **bio);
extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio); extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
......
...@@ -377,7 +377,6 @@ static void raid0_free(struct mddev *mddev, void *priv) ...@@ -377,7 +377,6 @@ static void raid0_free(struct mddev *mddev, void *priv)
struct r0conf *conf = priv; struct r0conf *conf = priv;
free_conf(mddev, conf); free_conf(mddev, conf);
acct_bioset_exit(mddev);
} }
static int raid0_run(struct mddev *mddev) static int raid0_run(struct mddev *mddev)
...@@ -392,16 +391,11 @@ static int raid0_run(struct mddev *mddev) ...@@ -392,16 +391,11 @@ static int raid0_run(struct mddev *mddev)
if (md_check_no_bitmap(mddev)) if (md_check_no_bitmap(mddev))
return -EINVAL; return -EINVAL;
if (acct_bioset_init(mddev)) {
pr_err("md/raid0:%s: alloc acct bioset failed.\n", mdname(mddev));
return -ENOMEM;
}
/* if private is not null, we are here after takeover */ /* if private is not null, we are here after takeover */
if (mddev->private == NULL) { if (mddev->private == NULL) {
ret = create_strip_zones(mddev, &conf); ret = create_strip_zones(mddev, &conf);
if (ret < 0) if (ret < 0)
goto exit_acct_set; return ret;
mddev->private = conf; mddev->private = conf;
} }
conf = mddev->private; conf = mddev->private;
...@@ -432,15 +426,9 @@ static int raid0_run(struct mddev *mddev) ...@@ -432,15 +426,9 @@ static int raid0_run(struct mddev *mddev)
ret = md_integrity_register(mddev); ret = md_integrity_register(mddev);
if (ret) if (ret)
goto free; free_conf(mddev, conf);
return ret; return ret;
free:
free_conf(mddev, conf);
exit_acct_set:
acct_bioset_exit(mddev);
return ret;
} }
/* /*
......
...@@ -304,8 +304,6 @@ static void call_bio_endio(struct r1bio *r1_bio) ...@@ -304,8 +304,6 @@ static void call_bio_endio(struct r1bio *r1_bio)
if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
bio->bi_status = BLK_STS_IOERR; bio->bi_status = BLK_STS_IOERR;
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
bio_end_io_acct(bio, r1_bio->start_time);
bio_endio(bio); bio_endio(bio);
} }
...@@ -791,11 +789,17 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect ...@@ -791,11 +789,17 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
return best_disk; return best_disk;
} }
static void wake_up_barrier(struct r1conf *conf)
{
if (wq_has_sleeper(&conf->wait_barrier))
wake_up(&conf->wait_barrier);
}
static void flush_bio_list(struct r1conf *conf, struct bio *bio) static void flush_bio_list(struct r1conf *conf, struct bio *bio)
{ {
/* flush any pending bitmap writes to disk before proceeding w/ I/O */ /* flush any pending bitmap writes to disk before proceeding w/ I/O */
raid1_prepare_flush_writes(conf->mddev->bitmap); raid1_prepare_flush_writes(conf->mddev->bitmap);
wake_up(&conf->wait_barrier); wake_up_barrier(conf);
while (bio) { /* submit pending writes */ while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next; struct bio *next = bio->bi_next;
...@@ -972,7 +976,7 @@ static bool _wait_barrier(struct r1conf *conf, int idx, bool nowait) ...@@ -972,7 +976,7 @@ static bool _wait_barrier(struct r1conf *conf, int idx, bool nowait)
* In case freeze_array() is waiting for * In case freeze_array() is waiting for
* get_unqueued_pending() == extra * get_unqueued_pending() == extra
*/ */
wake_up(&conf->wait_barrier); wake_up_barrier(conf);
/* Wait for the barrier in same barrier unit bucket to drop. */ /* Wait for the barrier in same barrier unit bucket to drop. */
/* Return false when nowait flag is set */ /* Return false when nowait flag is set */
...@@ -1015,7 +1019,7 @@ static bool wait_read_barrier(struct r1conf *conf, sector_t sector_nr, bool nowa ...@@ -1015,7 +1019,7 @@ static bool wait_read_barrier(struct r1conf *conf, sector_t sector_nr, bool nowa
* In case freeze_array() is waiting for * In case freeze_array() is waiting for
* get_unqueued_pending() == extra * get_unqueued_pending() == extra
*/ */
wake_up(&conf->wait_barrier); wake_up_barrier(conf);
/* Wait for array to be unfrozen */ /* Wait for array to be unfrozen */
/* Return false when nowait flag is set */ /* Return false when nowait flag is set */
...@@ -1044,7 +1048,7 @@ static bool wait_barrier(struct r1conf *conf, sector_t sector_nr, bool nowait) ...@@ -1044,7 +1048,7 @@ static bool wait_barrier(struct r1conf *conf, sector_t sector_nr, bool nowait)
static void _allow_barrier(struct r1conf *conf, int idx) static void _allow_barrier(struct r1conf *conf, int idx)
{ {
atomic_dec(&conf->nr_pending[idx]); atomic_dec(&conf->nr_pending[idx]);
wake_up(&conf->wait_barrier); wake_up_barrier(conf);
} }
static void allow_barrier(struct r1conf *conf, sector_t sector_nr) static void allow_barrier(struct r1conf *conf, sector_t sector_nr)
...@@ -1173,7 +1177,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) ...@@ -1173,7 +1177,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
bio_list_merge(&conf->pending_bio_list, &plug->pending); bio_list_merge(&conf->pending_bio_list, &plug->pending);
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_barrier); wake_up_barrier(conf);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
kfree(plug); kfree(plug);
return; return;
...@@ -1303,10 +1307,10 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, ...@@ -1303,10 +1307,10 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
} }
r1_bio->read_disk = rdisk; r1_bio->read_disk = rdisk;
if (!r1bio_existed) {
if (!r1bio_existed && blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) md_account_bio(mddev, &bio);
r1_bio->start_time = bio_start_io_acct(bio); r1_bio->master_bio = bio;
}
read_bio = bio_alloc_clone(mirror->rdev->bdev, bio, gfp, read_bio = bio_alloc_clone(mirror->rdev->bdev, bio, gfp,
&mddev->bio_set); &mddev->bio_set);
...@@ -1500,8 +1504,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1500,8 +1504,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
r1_bio->sectors = max_sectors; r1_bio->sectors = max_sectors;
} }
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) md_account_bio(mddev, &bio);
r1_bio->start_time = bio_start_io_acct(bio); r1_bio->master_bio = bio;
atomic_set(&r1_bio->remaining, 1); atomic_set(&r1_bio->remaining, 1);
atomic_set(&r1_bio->behind_remaining, 0); atomic_set(&r1_bio->behind_remaining, 0);
...@@ -1576,7 +1580,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1576,7 +1580,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
r1_bio_write_done(r1_bio); r1_bio_write_done(r1_bio);
/* In case raid1d snuck in to freeze_array */ /* In case raid1d snuck in to freeze_array */
wake_up(&conf->wait_barrier); wake_up_barrier(conf);
} }
static bool raid1_make_request(struct mddev *mddev, struct bio *bio) static bool raid1_make_request(struct mddev *mddev, struct bio *bio)
...@@ -1766,7 +1770,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1766,7 +1770,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
{ {
struct r1conf *conf = mddev->private; struct r1conf *conf = mddev->private;
int err = -EEXIST; int err = -EEXIST;
int mirror = 0; int mirror = 0, repl_slot = -1;
struct raid1_info *p; struct raid1_info *p;
int first = 0; int first = 0;
int last = conf->raid_disks - 1; int last = conf->raid_disks - 1;
...@@ -1809,17 +1813,21 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1809,17 +1813,21 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
break; break;
} }
if (test_bit(WantReplacement, &p->rdev->flags) && if (test_bit(WantReplacement, &p->rdev->flags) &&
p[conf->raid_disks].rdev == NULL) { p[conf->raid_disks].rdev == NULL && repl_slot < 0)
/* Add this device as a replacement */ repl_slot = mirror;
clear_bit(In_sync, &rdev->flags);
set_bit(Replacement, &rdev->flags);
rdev->raid_disk = mirror;
err = 0;
conf->fullsync = 1;
rcu_assign_pointer(p[conf->raid_disks].rdev, rdev);
break;
}
} }
if (err && repl_slot >= 0) {
/* Add this device as a replacement */
p = conf->mirrors + repl_slot;
clear_bit(In_sync, &rdev->flags);
set_bit(Replacement, &rdev->flags);
rdev->raid_disk = repl_slot;
err = 0;
conf->fullsync = 1;
rcu_assign_pointer(p[conf->raid_disks].rdev, rdev);
}
print_conf(conf); print_conf(conf);
return err; return err;
} }
...@@ -2299,7 +2307,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk, ...@@ -2299,7 +2307,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
d++; d++;
if (d == conf->raid_disks * 2) if (d == conf->raid_disks * 2)
d = 0; d = 0;
} while (!success && d != read_disk); } while (d != read_disk);
if (!success) { if (!success) {
/* Cannot read from anywhere - mark it bad */ /* Cannot read from anywhere - mark it bad */
......
...@@ -157,7 +157,6 @@ struct r1bio { ...@@ -157,7 +157,6 @@ struct r1bio {
sector_t sector; sector_t sector;
int sectors; int sectors;
unsigned long state; unsigned long state;
unsigned long start_time;
struct mddev *mddev; struct mddev *mddev;
/* /*
* original bio going to /dev/mdx * original bio going to /dev/mdx
......
...@@ -325,8 +325,6 @@ static void raid_end_bio_io(struct r10bio *r10_bio) ...@@ -325,8 +325,6 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
bio->bi_status = BLK_STS_IOERR; bio->bi_status = BLK_STS_IOERR;
if (r10_bio->start_time)
bio_end_io_acct(bio, r10_bio->start_time);
bio_endio(bio); bio_endio(bio);
/* /*
* Wake up any possible resync thread that waits for the device * Wake up any possible resync thread that waits for the device
...@@ -1172,7 +1170,7 @@ static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf, ...@@ -1172,7 +1170,7 @@ static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf,
} }
static void raid10_read_request(struct mddev *mddev, struct bio *bio, static void raid10_read_request(struct mddev *mddev, struct bio *bio,
struct r10bio *r10_bio) struct r10bio *r10_bio, bool io_accounting)
{ {
struct r10conf *conf = mddev->private; struct r10conf *conf = mddev->private;
struct bio *read_bio; struct bio *read_bio;
...@@ -1243,9 +1241,10 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, ...@@ -1243,9 +1241,10 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
} }
slot = r10_bio->read_slot; slot = r10_bio->read_slot;
if (!r10_bio->start_time && if (io_accounting) {
blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) md_account_bio(mddev, &bio);
r10_bio->start_time = bio_start_io_acct(bio); r10_bio->master_bio = bio;
}
read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set); read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set);
r10_bio->devs[slot].bio = read_bio; r10_bio->devs[slot].bio = read_bio;
...@@ -1322,6 +1321,25 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, ...@@ -1322,6 +1321,25 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
} }
} }
static struct md_rdev *dereference_rdev_and_rrdev(struct raid10_info *mirror,
struct md_rdev **prrdev)
{
struct md_rdev *rdev, *rrdev;
rrdev = rcu_dereference(mirror->replacement);
/*
* Read replacement first to prevent reading both rdev and
* replacement as NULL during replacement replace rdev.
*/
smp_mb();
rdev = rcu_dereference(mirror->rdev);
if (rdev == rrdev)
rrdev = NULL;
*prrdev = rrdev;
return rdev;
}
static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio) static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
{ {
int i; int i;
...@@ -1332,11 +1350,9 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio) ...@@ -1332,11 +1350,9 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
blocked_rdev = NULL; blocked_rdev = NULL;
rcu_read_lock(); rcu_read_lock();
for (i = 0; i < conf->copies; i++) { for (i = 0; i < conf->copies; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); struct md_rdev *rdev, *rrdev;
struct md_rdev *rrdev = rcu_dereference(
conf->mirrors[i].replacement); rdev = dereference_rdev_and_rrdev(&conf->mirrors[i], &rrdev);
if (rdev == rrdev)
rrdev = NULL;
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
blocked_rdev = rdev; blocked_rdev = rdev;
...@@ -1465,15 +1481,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1465,15 +1481,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
int d = r10_bio->devs[i].devnum; int d = r10_bio->devs[i].devnum;
struct md_rdev *rdev, *rrdev; struct md_rdev *rdev, *rrdev;
rrdev = rcu_dereference(conf->mirrors[d].replacement); rdev = dereference_rdev_and_rrdev(&conf->mirrors[d], &rrdev);
/*
* Read replacement first to prevent reading both rdev and
* replacement as NULL during replacement replace rdev.
*/
smp_mb();
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev == rrdev)
rrdev = NULL;
if (rdev && (test_bit(Faulty, &rdev->flags))) if (rdev && (test_bit(Faulty, &rdev->flags)))
rdev = NULL; rdev = NULL;
if (rrdev && (test_bit(Faulty, &rrdev->flags))) if (rrdev && (test_bit(Faulty, &rrdev->flags)))
...@@ -1543,8 +1551,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1543,8 +1551,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
r10_bio->master_bio = bio; r10_bio->master_bio = bio;
} }
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) md_account_bio(mddev, &bio);
r10_bio->start_time = bio_start_io_acct(bio); r10_bio->master_bio = bio;
atomic_set(&r10_bio->remaining, 1); atomic_set(&r10_bio->remaining, 1);
md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0); md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
...@@ -1571,12 +1579,11 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors) ...@@ -1571,12 +1579,11 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
r10_bio->sector = bio->bi_iter.bi_sector; r10_bio->sector = bio->bi_iter.bi_sector;
r10_bio->state = 0; r10_bio->state = 0;
r10_bio->read_slot = -1; r10_bio->read_slot = -1;
r10_bio->start_time = 0;
memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) *
conf->geo.raid_disks); conf->geo.raid_disks);
if (bio_data_dir(bio) == READ) if (bio_data_dir(bio) == READ)
raid10_read_request(mddev, bio, r10_bio); raid10_read_request(mddev, bio, r10_bio, true);
else else
raid10_write_request(mddev, bio, r10_bio); raid10_write_request(mddev, bio, r10_bio);
} }
...@@ -1780,10 +1787,9 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) ...@@ -1780,10 +1787,9 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
*/ */
rcu_read_lock(); rcu_read_lock();
for (disk = 0; disk < geo->raid_disks; disk++) { for (disk = 0; disk < geo->raid_disks; disk++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev); struct md_rdev *rdev, *rrdev;
struct md_rdev *rrdev = rcu_dereference(
conf->mirrors[disk].replacement);
rdev = dereference_rdev_and_rrdev(&conf->mirrors[disk], &rrdev);
r10_bio->devs[disk].bio = NULL; r10_bio->devs[disk].bio = NULL;
r10_bio->devs[disk].repl_bio = NULL; r10_bio->devs[disk].repl_bio = NULL;
...@@ -2720,10 +2726,10 @@ static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector, ...@@ -2720,10 +2726,10 @@ static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10bio *r10_bio) static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10bio *r10_bio)
{ {
int sect = 0; /* Offset from r10_bio->sector */ int sect = 0; /* Offset from r10_bio->sector */
int sectors = r10_bio->sectors; int sectors = r10_bio->sectors, slot = r10_bio->read_slot;
struct md_rdev *rdev; struct md_rdev *rdev;
int max_read_errors = atomic_read(&mddev->max_corr_read_errors); int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
int d = r10_bio->devs[r10_bio->read_slot].devnum; int d = r10_bio->devs[slot].devnum;
/* still own a reference to this rdev, so it cannot /* still own a reference to this rdev, so it cannot
* have been cleared recently. * have been cleared recently.
...@@ -2744,13 +2750,13 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2744,13 +2750,13 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
pr_notice("md/raid10:%s: %pg: Failing raid device\n", pr_notice("md/raid10:%s: %pg: Failing raid device\n",
mdname(mddev), rdev->bdev); mdname(mddev), rdev->bdev);
md_error(mddev, rdev); md_error(mddev, rdev);
r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED; r10_bio->devs[slot].bio = IO_BLOCKED;
return; return;
} }
while(sectors) { while(sectors) {
int s = sectors; int s = sectors;
int sl = r10_bio->read_slot; int sl = slot;
int success = 0; int success = 0;
int start; int start;
...@@ -2785,7 +2791,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2785,7 +2791,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
sl++; sl++;
if (sl == conf->copies) if (sl == conf->copies)
sl = 0; sl = 0;
} while (!success && sl != r10_bio->read_slot); } while (sl != slot);
rcu_read_unlock(); rcu_read_unlock();
if (!success) { if (!success) {
...@@ -2793,16 +2799,16 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2793,16 +2799,16 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
* as bad on the first device to discourage future * as bad on the first device to discourage future
* reads. * reads.
*/ */
int dn = r10_bio->devs[r10_bio->read_slot].devnum; int dn = r10_bio->devs[slot].devnum;
rdev = conf->mirrors[dn].rdev; rdev = conf->mirrors[dn].rdev;
if (!rdev_set_badblocks( if (!rdev_set_badblocks(
rdev, rdev,
r10_bio->devs[r10_bio->read_slot].addr r10_bio->devs[slot].addr
+ sect, + sect,
s, 0)) { s, 0)) {
md_error(mddev, rdev); md_error(mddev, rdev);
r10_bio->devs[r10_bio->read_slot].bio r10_bio->devs[slot].bio
= IO_BLOCKED; = IO_BLOCKED;
} }
break; break;
...@@ -2811,7 +2817,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2811,7 +2817,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
start = sl; start = sl;
/* write it back and re-read */ /* write it back and re-read */
rcu_read_lock(); rcu_read_lock();
while (sl != r10_bio->read_slot) { while (sl != slot) {
if (sl==0) if (sl==0)
sl = conf->copies; sl = conf->copies;
sl--; sl--;
...@@ -2845,7 +2851,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2845,7 +2851,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
rcu_read_lock(); rcu_read_lock();
} }
sl = start; sl = start;
while (sl != r10_bio->read_slot) { while (sl != slot) {
if (sl==0) if (sl==0)
sl = conf->copies; sl = conf->copies;
sl--; sl--;
...@@ -2985,7 +2991,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) ...@@ -2985,7 +2991,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
r10_bio->state = 0; r10_bio->state = 0;
raid10_read_request(mddev, r10_bio->master_bio, r10_bio); raid10_read_request(mddev, r10_bio->master_bio, r10_bio, false);
/* /*
* allow_barrier after re-submit to ensure no sync io * allow_barrier after re-submit to ensure no sync io
* can be issued while regular io pending. * can be issued while regular io pending.
......
...@@ -123,7 +123,6 @@ struct r10bio { ...@@ -123,7 +123,6 @@ struct r10bio {
sector_t sector; /* virtual sector number */ sector_t sector; /* virtual sector number */
int sectors; int sectors;
unsigned long state; unsigned long state;
unsigned long start_time;
struct mddev *mddev; struct mddev *mddev;
/* /*
* original bio going to /dev/mdx * original bio going to /dev/mdx
......
...@@ -5468,26 +5468,17 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf, ...@@ -5468,26 +5468,17 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf,
*/ */
static void raid5_align_endio(struct bio *bi) static void raid5_align_endio(struct bio *bi)
{ {
struct md_io_acct *md_io_acct = bi->bi_private; struct bio *raid_bi = bi->bi_private;
struct bio *raid_bi = md_io_acct->orig_bio; struct md_rdev *rdev = (void *)raid_bi->bi_next;
struct mddev *mddev; struct mddev *mddev = rdev->mddev;
struct r5conf *conf; struct r5conf *conf = mddev->private;
struct md_rdev *rdev;
blk_status_t error = bi->bi_status; blk_status_t error = bi->bi_status;
unsigned long start_time = md_io_acct->start_time;
bio_put(bi); bio_put(bi);
rdev = (void*)raid_bi->bi_next;
raid_bi->bi_next = NULL; raid_bi->bi_next = NULL;
mddev = rdev->mddev;
conf = mddev->private;
rdev_dec_pending(rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev);
if (!error) { if (!error) {
if (blk_queue_io_stat(raid_bi->bi_bdev->bd_disk->queue))
bio_end_io_acct(raid_bi, start_time);
bio_endio(raid_bi); bio_endio(raid_bi);
if (atomic_dec_and_test(&conf->active_aligned_reads)) if (atomic_dec_and_test(&conf->active_aligned_reads))
wake_up(&conf->wait_for_quiescent); wake_up(&conf->wait_for_quiescent);
...@@ -5506,7 +5497,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) ...@@ -5506,7 +5497,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
struct md_rdev *rdev; struct md_rdev *rdev;
sector_t sector, end_sector, first_bad; sector_t sector, end_sector, first_bad;
int bad_sectors, dd_idx; int bad_sectors, dd_idx;
struct md_io_acct *md_io_acct;
bool did_inc; bool did_inc;
if (!in_chunk_boundary(mddev, raid_bio)) { if (!in_chunk_boundary(mddev, raid_bio)) {
...@@ -5543,16 +5533,13 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) ...@@ -5543,16 +5533,13 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
return 0; return 0;
} }
align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO, md_account_bio(mddev, &raid_bio);
&mddev->io_acct_set);
md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone);
raid_bio->bi_next = (void *)rdev; raid_bio->bi_next = (void *)rdev;
if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
md_io_acct->start_time = bio_start_io_acct(raid_bio);
md_io_acct->orig_bio = raid_bio;
align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
&mddev->bio_set);
align_bio->bi_end_io = raid5_align_endio; align_bio->bi_end_io = raid5_align_endio;
align_bio->bi_private = md_io_acct; align_bio->bi_private = raid_bio;
align_bio->bi_iter.bi_sector = sector; align_bio->bi_iter.bi_sector = sector;
/* No reshape active, so we can trust rdev->data_offset */ /* No reshape active, so we can trust rdev->data_offset */
...@@ -7787,19 +7774,12 @@ static int raid5_run(struct mddev *mddev) ...@@ -7787,19 +7774,12 @@ static int raid5_run(struct mddev *mddev)
struct md_rdev *rdev; struct md_rdev *rdev;
struct md_rdev *journal_dev = NULL; struct md_rdev *journal_dev = NULL;
sector_t reshape_offset = 0; sector_t reshape_offset = 0;
int i, ret = 0; int i;
long long min_offset_diff = 0; long long min_offset_diff = 0;
int first = 1; int first = 1;
if (acct_bioset_init(mddev)) { if (mddev_init_writes_pending(mddev) < 0)
pr_err("md/raid456:%s: alloc acct bioset failed.\n", mdname(mddev));
return -ENOMEM; return -ENOMEM;
}
if (mddev_init_writes_pending(mddev) < 0) {
ret = -ENOMEM;
goto exit_acct_set;
}
if (mddev->recovery_cp != MaxSector) if (mddev->recovery_cp != MaxSector)
pr_notice("md/raid:%s: not clean -- starting background reconstruction\n", pr_notice("md/raid:%s: not clean -- starting background reconstruction\n",
...@@ -7830,8 +7810,7 @@ static int raid5_run(struct mddev *mddev) ...@@ -7830,8 +7810,7 @@ static int raid5_run(struct mddev *mddev)
(mddev->bitmap_info.offset || mddev->bitmap_info.file)) { (mddev->bitmap_info.offset || mddev->bitmap_info.file)) {
pr_notice("md/raid:%s: array cannot have both journal and bitmap\n", pr_notice("md/raid:%s: array cannot have both journal and bitmap\n",
mdname(mddev)); mdname(mddev));
ret = -EINVAL; return -EINVAL;
goto exit_acct_set;
} }
if (mddev->reshape_position != MaxSector) { if (mddev->reshape_position != MaxSector) {
...@@ -7856,15 +7835,13 @@ static int raid5_run(struct mddev *mddev) ...@@ -7856,15 +7835,13 @@ static int raid5_run(struct mddev *mddev)
if (journal_dev) { if (journal_dev) {
pr_warn("md/raid:%s: don't support reshape with journal - aborting.\n", pr_warn("md/raid:%s: don't support reshape with journal - aborting.\n",
mdname(mddev)); mdname(mddev));
ret = -EINVAL; return -EINVAL;
goto exit_acct_set;
} }
if (mddev->new_level != mddev->level) { if (mddev->new_level != mddev->level) {
pr_warn("md/raid:%s: unsupported reshape required - aborting.\n", pr_warn("md/raid:%s: unsupported reshape required - aborting.\n",
mdname(mddev)); mdname(mddev));
ret = -EINVAL; return -EINVAL;
goto exit_acct_set;
} }
old_disks = mddev->raid_disks - mddev->delta_disks; old_disks = mddev->raid_disks - mddev->delta_disks;
/* reshape_position must be on a new-stripe boundary, and one /* reshape_position must be on a new-stripe boundary, and one
...@@ -7880,8 +7857,7 @@ static int raid5_run(struct mddev *mddev) ...@@ -7880,8 +7857,7 @@ static int raid5_run(struct mddev *mddev)
if (sector_div(here_new, chunk_sectors * new_data_disks)) { if (sector_div(here_new, chunk_sectors * new_data_disks)) {
pr_warn("md/raid:%s: reshape_position not on a stripe boundary\n", pr_warn("md/raid:%s: reshape_position not on a stripe boundary\n",
mdname(mddev)); mdname(mddev));
ret = -EINVAL; return -EINVAL;
goto exit_acct_set;
} }
reshape_offset = here_new * chunk_sectors; reshape_offset = here_new * chunk_sectors;
/* here_new is the stripe we will write to */ /* here_new is the stripe we will write to */
...@@ -7903,8 +7879,7 @@ static int raid5_run(struct mddev *mddev) ...@@ -7903,8 +7879,7 @@ static int raid5_run(struct mddev *mddev)
else if (mddev->ro == 0) { else if (mddev->ro == 0) {
pr_warn("md/raid:%s: in-place reshape must be started in read-only mode - aborting\n", pr_warn("md/raid:%s: in-place reshape must be started in read-only mode - aborting\n",
mdname(mddev)); mdname(mddev));
ret = -EINVAL; return -EINVAL;
goto exit_acct_set;
} }
} else if (mddev->reshape_backwards } else if (mddev->reshape_backwards
? (here_new * chunk_sectors + min_offset_diff <= ? (here_new * chunk_sectors + min_offset_diff <=
...@@ -7914,8 +7889,7 @@ static int raid5_run(struct mddev *mddev) ...@@ -7914,8 +7889,7 @@ static int raid5_run(struct mddev *mddev)
/* Reading from the same stripe as writing to - bad */ /* Reading from the same stripe as writing to - bad */
pr_warn("md/raid:%s: reshape_position too early for auto-recovery - aborting.\n", pr_warn("md/raid:%s: reshape_position too early for auto-recovery - aborting.\n",
mdname(mddev)); mdname(mddev));
ret = -EINVAL; return -EINVAL;
goto exit_acct_set;
} }
pr_debug("md/raid:%s: reshape will continue\n", mdname(mddev)); pr_debug("md/raid:%s: reshape will continue\n", mdname(mddev));
/* OK, we should be able to continue; */ /* OK, we should be able to continue; */
...@@ -7939,10 +7913,8 @@ static int raid5_run(struct mddev *mddev) ...@@ -7939,10 +7913,8 @@ static int raid5_run(struct mddev *mddev)
else else
conf = mddev->private; conf = mddev->private;
if (IS_ERR(conf)) { if (IS_ERR(conf))
ret = PTR_ERR(conf); return PTR_ERR(conf);
goto exit_acct_set;
}
if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
if (!journal_dev) { if (!journal_dev) {
...@@ -8140,10 +8112,7 @@ static int raid5_run(struct mddev *mddev) ...@@ -8140,10 +8112,7 @@ static int raid5_run(struct mddev *mddev)
free_conf(conf); free_conf(conf);
mddev->private = NULL; mddev->private = NULL;
pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev)); pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev));
ret = -EIO; return -EIO;
exit_acct_set:
acct_bioset_exit(mddev);
return ret;
} }
static void raid5_free(struct mddev *mddev, void *priv) static void raid5_free(struct mddev *mddev, void *priv)
...@@ -8151,7 +8120,6 @@ static void raid5_free(struct mddev *mddev, void *priv) ...@@ -8151,7 +8120,6 @@ static void raid5_free(struct mddev *mddev, void *priv)
struct r5conf *conf = priv; struct r5conf *conf = priv;
free_conf(conf); free_conf(conf);
acct_bioset_exit(mddev);
mddev->to_remove = &raid5_attrs_group; mddev->to_remove = &raid5_attrs_group;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment