Commit 8d0304e6 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'md/3.16' of git://neil.brown.name/md

Pull md updates from Neil Brown:
 "Assorted md fixes for 3.16

  Mostly performance improvements with a few corner-case bug fixes"

* tag 'md/3.16' of git://neil.brown.name/md:
  raid5: speedup sync_request processing
  md/raid5: deadlock between retry_aligned_read with barrier io
  raid5: add an option to avoid copy data from bio to stripe cache
  md/bitmap: remove confusing code from filemap_get_page.
  raid5: avoid release list until last reference of the stripe
  md: md_clear_badblocks should return an error code on failure.
  md/raid56: Don't perform reads to support writes until stripe is ready.
  md: refuse to change shape of array if it is active but read-only
parents dfb94547 053f5b65
...@@ -669,17 +669,13 @@ static inline unsigned long file_page_offset(struct bitmap_storage *store, ...@@ -669,17 +669,13 @@ static inline unsigned long file_page_offset(struct bitmap_storage *store,
/* /*
* return a pointer to the page in the filemap that contains the given bit * return a pointer to the page in the filemap that contains the given bit
* *
* this lookup is complicated by the fact that the bitmap sb might be exactly
* 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page
* 0 or page 1
*/ */
static inline struct page *filemap_get_page(struct bitmap_storage *store, static inline struct page *filemap_get_page(struct bitmap_storage *store,
unsigned long chunk) unsigned long chunk)
{ {
if (file_page_index(store, chunk) >= store->file_pages) if (file_page_index(store, chunk) >= store->file_pages)
return NULL; return NULL;
return store->filemap[file_page_index(store, chunk) return store->filemap[file_page_index(store, chunk)];
- file_page_index(store, 0)];
} }
static int bitmap_storage_alloc(struct bitmap_storage *store, static int bitmap_storage_alloc(struct bitmap_storage *store,
......
...@@ -3448,6 +3448,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -3448,6 +3448,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
mddev->level = LEVEL_NONE; mddev->level = LEVEL_NONE;
return rv; return rv;
} }
if (mddev->ro)
return -EROFS;
/* request to change the personality. Need to ensure: /* request to change the personality. Need to ensure:
* - array is not engaged in resync/recovery/reshape * - array is not engaged in resync/recovery/reshape
...@@ -3634,6 +3636,8 @@ layout_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -3634,6 +3636,8 @@ layout_store(struct mddev *mddev, const char *buf, size_t len)
int err; int err;
if (mddev->pers->check_reshape == NULL) if (mddev->pers->check_reshape == NULL)
return -EBUSY; return -EBUSY;
if (mddev->ro)
return -EROFS;
mddev->new_layout = n; mddev->new_layout = n;
err = mddev->pers->check_reshape(mddev); err = mddev->pers->check_reshape(mddev);
if (err) { if (err) {
...@@ -3723,6 +3727,8 @@ chunk_size_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -3723,6 +3727,8 @@ chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
int err; int err;
if (mddev->pers->check_reshape == NULL) if (mddev->pers->check_reshape == NULL)
return -EBUSY; return -EBUSY;
if (mddev->ro)
return -EROFS;
mddev->new_chunk_sectors = n >> 9; mddev->new_chunk_sectors = n >> 9;
err = mddev->pers->check_reshape(mddev); err = mddev->pers->check_reshape(mddev);
if (err) { if (err) {
...@@ -6135,6 +6141,8 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) ...@@ -6135,6 +6141,8 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
*/ */
if (mddev->sync_thread) if (mddev->sync_thread)
return -EBUSY; return -EBUSY;
if (mddev->ro)
return -EROFS;
rdev_for_each(rdev, mddev) { rdev_for_each(rdev, mddev) {
sector_t avail = rdev->sectors; sector_t avail = rdev->sectors;
...@@ -6157,6 +6165,8 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks) ...@@ -6157,6 +6165,8 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
/* change the number of raid disks */ /* change the number of raid disks */
if (mddev->pers->check_reshape == NULL) if (mddev->pers->check_reshape == NULL)
return -EINVAL; return -EINVAL;
if (mddev->ro)
return -EROFS;
if (raid_disks <= 0 || if (raid_disks <= 0 ||
(mddev->max_disks && raid_disks >= mddev->max_disks)) (mddev->max_disks && raid_disks >= mddev->max_disks))
return -EINVAL; return -EINVAL;
...@@ -8333,7 +8343,7 @@ static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors) ...@@ -8333,7 +8343,7 @@ static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors)
if (a < s) { if (a < s) {
/* we need to split this range */ /* we need to split this range */
if (bb->count >= MD_MAX_BADBLOCKS) { if (bb->count >= MD_MAX_BADBLOCKS) {
rv = 0; rv = -ENOSPC;
goto out; goto out;
} }
memmove(p+lo+1, p+lo, (bb->count - lo) * 8); memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
......
...@@ -292,9 +292,12 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh, ...@@ -292,9 +292,12 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
BUG_ON(atomic_read(&conf->active_stripes)==0); BUG_ON(atomic_read(&conf->active_stripes)==0);
if (test_bit(STRIPE_HANDLE, &sh->state)) { if (test_bit(STRIPE_HANDLE, &sh->state)) {
if (test_bit(STRIPE_DELAYED, &sh->state) && if (test_bit(STRIPE_DELAYED, &sh->state) &&
!test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
list_add_tail(&sh->lru, &conf->delayed_list); list_add_tail(&sh->lru, &conf->delayed_list);
else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && if (atomic_read(&conf->preread_active_stripes)
< IO_THRESHOLD)
md_wakeup_thread(conf->mddev->thread);
} else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
sh->bm_seq - conf->seq_write > 0) sh->bm_seq - conf->seq_write > 0)
list_add_tail(&sh->lru, &conf->bitmap_list); list_add_tail(&sh->lru, &conf->bitmap_list);
else { else {
...@@ -413,6 +416,11 @@ static void release_stripe(struct stripe_head *sh) ...@@ -413,6 +416,11 @@ static void release_stripe(struct stripe_head *sh)
int hash; int hash;
bool wakeup; bool wakeup;
/* Avoid release_list until the last reference.
*/
if (atomic_add_unless(&sh->count, -1, 1))
return;
if (unlikely(!conf->mddev->thread) || if (unlikely(!conf->mddev->thread) ||
test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state)) test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
goto slow_path; goto slow_path;
...@@ -479,6 +487,7 @@ static void shrink_buffers(struct stripe_head *sh) ...@@ -479,6 +487,7 @@ static void shrink_buffers(struct stripe_head *sh)
int num = sh->raid_conf->pool_size; int num = sh->raid_conf->pool_size;
for (i = 0; i < num ; i++) { for (i = 0; i < num ; i++) {
WARN_ON(sh->dev[i].page != sh->dev[i].orig_page);
p = sh->dev[i].page; p = sh->dev[i].page;
if (!p) if (!p)
continue; continue;
...@@ -499,6 +508,7 @@ static int grow_buffers(struct stripe_head *sh) ...@@ -499,6 +508,7 @@ static int grow_buffers(struct stripe_head *sh)
return 1; return 1;
} }
sh->dev[i].page = page; sh->dev[i].page = page;
sh->dev[i].orig_page = page;
} }
return 0; return 0;
} }
...@@ -855,6 +865,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) ...@@ -855,6 +865,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
bi->bi_rw |= REQ_NOMERGE; bi->bi_rw |= REQ_NOMERGE;
if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
sh->dev[i].vec.bv_page = sh->dev[i].page;
bi->bi_vcnt = 1; bi->bi_vcnt = 1;
bi->bi_io_vec[0].bv_len = STRIPE_SIZE; bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
bi->bi_io_vec[0].bv_offset = 0; bi->bi_io_vec[0].bv_offset = 0;
...@@ -899,6 +912,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) ...@@ -899,6 +912,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
else else
rbi->bi_iter.bi_sector = (sh->sector rbi->bi_iter.bi_sector = (sh->sector
+ rrdev->data_offset); + rrdev->data_offset);
if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
sh->dev[i].rvec.bv_page = sh->dev[i].page;
rbi->bi_vcnt = 1; rbi->bi_vcnt = 1;
rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
rbi->bi_io_vec[0].bv_offset = 0; rbi->bi_io_vec[0].bv_offset = 0;
...@@ -927,8 +943,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) ...@@ -927,8 +943,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
} }
static struct dma_async_tx_descriptor * static struct dma_async_tx_descriptor *
async_copy_data(int frombio, struct bio *bio, struct page *page, async_copy_data(int frombio, struct bio *bio, struct page **page,
sector_t sector, struct dma_async_tx_descriptor *tx) sector_t sector, struct dma_async_tx_descriptor *tx,
struct stripe_head *sh)
{ {
struct bio_vec bvl; struct bio_vec bvl;
struct bvec_iter iter; struct bvec_iter iter;
...@@ -965,11 +982,16 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, ...@@ -965,11 +982,16 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
if (clen > 0) { if (clen > 0) {
b_offset += bvl.bv_offset; b_offset += bvl.bv_offset;
bio_page = bvl.bv_page; bio_page = bvl.bv_page;
if (frombio) if (frombio) {
tx = async_memcpy(page, bio_page, page_offset, if (sh->raid_conf->skip_copy &&
b_offset == 0 && page_offset == 0 &&
clen == STRIPE_SIZE)
*page = bio_page;
else
tx = async_memcpy(*page, bio_page, page_offset,
b_offset, clen, &submit); b_offset, clen, &submit);
else } else
tx = async_memcpy(bio_page, page, b_offset, tx = async_memcpy(bio_page, *page, b_offset,
page_offset, clen, &submit); page_offset, clen, &submit);
} }
/* chain the operations */ /* chain the operations */
...@@ -1045,8 +1067,8 @@ static void ops_run_biofill(struct stripe_head *sh) ...@@ -1045,8 +1067,8 @@ static void ops_run_biofill(struct stripe_head *sh)
spin_unlock_irq(&sh->stripe_lock); spin_unlock_irq(&sh->stripe_lock);
while (rbi && rbi->bi_iter.bi_sector < while (rbi && rbi->bi_iter.bi_sector <
dev->sector + STRIPE_SECTORS) { dev->sector + STRIPE_SECTORS) {
tx = async_copy_data(0, rbi, dev->page, tx = async_copy_data(0, rbi, &dev->page,
dev->sector, tx); dev->sector, tx, sh);
rbi = r5_next_bio(rbi, dev->sector); rbi = r5_next_bio(rbi, dev->sector);
} }
} }
...@@ -1384,6 +1406,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) ...@@ -1384,6 +1406,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
BUG_ON(dev->written); BUG_ON(dev->written);
wbi = dev->written = chosen; wbi = dev->written = chosen;
spin_unlock_irq(&sh->stripe_lock); spin_unlock_irq(&sh->stripe_lock);
WARN_ON(dev->page != dev->orig_page);
while (wbi && wbi->bi_iter.bi_sector < while (wbi && wbi->bi_iter.bi_sector <
dev->sector + STRIPE_SECTORS) { dev->sector + STRIPE_SECTORS) {
...@@ -1393,9 +1416,15 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) ...@@ -1393,9 +1416,15 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
set_bit(R5_SyncIO, &dev->flags); set_bit(R5_SyncIO, &dev->flags);
if (wbi->bi_rw & REQ_DISCARD) if (wbi->bi_rw & REQ_DISCARD)
set_bit(R5_Discard, &dev->flags); set_bit(R5_Discard, &dev->flags);
else else {
tx = async_copy_data(1, wbi, dev->page, tx = async_copy_data(1, wbi, &dev->page,
dev->sector, tx); dev->sector, tx, sh);
if (dev->page != dev->orig_page) {
set_bit(R5_SkipCopy, &dev->flags);
clear_bit(R5_UPTODATE, &dev->flags);
clear_bit(R5_OVERWRITE, &dev->flags);
}
}
wbi = r5_next_bio(wbi, dev->sector); wbi = r5_next_bio(wbi, dev->sector);
} }
} }
...@@ -1426,7 +1455,7 @@ static void ops_complete_reconstruct(void *stripe_head_ref) ...@@ -1426,7 +1455,7 @@ static void ops_complete_reconstruct(void *stripe_head_ref)
struct r5dev *dev = &sh->dev[i]; struct r5dev *dev = &sh->dev[i];
if (dev->written || i == pd_idx || i == qd_idx) { if (dev->written || i == pd_idx || i == qd_idx) {
if (!discard) if (!discard && !test_bit(R5_SkipCopy, &dev->flags))
set_bit(R5_UPTODATE, &dev->flags); set_bit(R5_UPTODATE, &dev->flags);
if (fua) if (fua)
set_bit(R5_WantFUA, &dev->flags); set_bit(R5_WantFUA, &dev->flags);
...@@ -1839,8 +1868,10 @@ static int resize_stripes(struct r5conf *conf, int newsize) ...@@ -1839,8 +1868,10 @@ static int resize_stripes(struct r5conf *conf, int newsize)
osh = get_free_stripe(conf, hash); osh = get_free_stripe(conf, hash);
unlock_device_hash_lock(conf, hash); unlock_device_hash_lock(conf, hash);
atomic_set(&nsh->count, 1); atomic_set(&nsh->count, 1);
for(i=0; i<conf->pool_size; i++) for(i=0; i<conf->pool_size; i++) {
nsh->dev[i].page = osh->dev[i].page; nsh->dev[i].page = osh->dev[i].page;
nsh->dev[i].orig_page = osh->dev[i].page;
}
for( ; i<newsize; i++) for( ; i<newsize; i++)
nsh->dev[i].page = NULL; nsh->dev[i].page = NULL;
nsh->hash_lock_index = hash; nsh->hash_lock_index = hash;
...@@ -1896,6 +1927,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) ...@@ -1896,6 +1927,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
if (nsh->dev[i].page == NULL) { if (nsh->dev[i].page == NULL) {
struct page *p = alloc_page(GFP_NOIO); struct page *p = alloc_page(GFP_NOIO);
nsh->dev[i].page = p; nsh->dev[i].page = p;
nsh->dev[i].orig_page = p;
if (!p) if (!p)
err = -ENOMEM; err = -ENOMEM;
} }
...@@ -2133,24 +2165,20 @@ static void raid5_end_write_request(struct bio *bi, int error) ...@@ -2133,24 +2165,20 @@ static void raid5_end_write_request(struct bio *bi, int error)
} }
static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous); static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
static void raid5_build_block(struct stripe_head *sh, int i, int previous) static void raid5_build_block(struct stripe_head *sh, int i, int previous)
{ {
struct r5dev *dev = &sh->dev[i]; struct r5dev *dev = &sh->dev[i];
bio_init(&dev->req); bio_init(&dev->req);
dev->req.bi_io_vec = &dev->vec; dev->req.bi_io_vec = &dev->vec;
dev->req.bi_vcnt++; dev->req.bi_max_vecs = 1;
dev->req.bi_max_vecs++;
dev->req.bi_private = sh; dev->req.bi_private = sh;
dev->vec.bv_page = dev->page;
bio_init(&dev->rreq); bio_init(&dev->rreq);
dev->rreq.bi_io_vec = &dev->rvec; dev->rreq.bi_io_vec = &dev->rvec;
dev->rreq.bi_vcnt++; dev->rreq.bi_max_vecs = 1;
dev->rreq.bi_max_vecs++;
dev->rreq.bi_private = sh; dev->rreq.bi_private = sh;
dev->rvec.bv_page = dev->page;
dev->flags = 0; dev->flags = 0;
dev->sector = compute_blocknr(sh, i, previous); dev->sector = compute_blocknr(sh, i, previous);
...@@ -2750,6 +2778,11 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, ...@@ -2750,6 +2778,11 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
/* and fail all 'written' */ /* and fail all 'written' */
bi = sh->dev[i].written; bi = sh->dev[i].written;
sh->dev[i].written = NULL; sh->dev[i].written = NULL;
if (test_and_clear_bit(R5_SkipCopy, &sh->dev[i].flags)) {
WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
sh->dev[i].page = sh->dev[i].orig_page;
}
if (bi) bitmap_end = 1; if (bi) bitmap_end = 1;
while (bi && bi->bi_iter.bi_sector < while (bi && bi->bi_iter.bi_sector <
sh->dev[i].sector + STRIPE_SECTORS) { sh->dev[i].sector + STRIPE_SECTORS) {
...@@ -2886,8 +2919,11 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, ...@@ -2886,8 +2919,11 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
(s->failed >= 1 && fdev[0]->toread) || (s->failed >= 1 && fdev[0]->toread) ||
(s->failed >= 2 && fdev[1]->toread) || (s->failed >= 2 && fdev[1]->toread) ||
(sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite && (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite &&
(!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) &&
!test_bit(R5_OVERWRITE, &fdev[0]->flags)) || !test_bit(R5_OVERWRITE, &fdev[0]->flags)) ||
(sh->raid_conf->level == 6 && s->failed && s->to_write))) { (sh->raid_conf->level == 6 && s->failed && s->to_write &&
s->to_write < sh->raid_conf->raid_disks - 2 &&
(!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) {
/* we would like to get this block, possibly by computing it, /* we would like to get this block, possibly by computing it,
* otherwise read it if the backing disk is insync * otherwise read it if the backing disk is insync
*/ */
...@@ -2991,12 +3027,17 @@ static void handle_stripe_clean_event(struct r5conf *conf, ...@@ -2991,12 +3027,17 @@ static void handle_stripe_clean_event(struct r5conf *conf,
dev = &sh->dev[i]; dev = &sh->dev[i];
if (!test_bit(R5_LOCKED, &dev->flags) && if (!test_bit(R5_LOCKED, &dev->flags) &&
(test_bit(R5_UPTODATE, &dev->flags) || (test_bit(R5_UPTODATE, &dev->flags) ||
test_bit(R5_Discard, &dev->flags))) { test_bit(R5_Discard, &dev->flags) ||
test_bit(R5_SkipCopy, &dev->flags))) {
/* We can return any write requests */ /* We can return any write requests */
struct bio *wbi, *wbi2; struct bio *wbi, *wbi2;
pr_debug("Return write for disc %d\n", i); pr_debug("Return write for disc %d\n", i);
if (test_and_clear_bit(R5_Discard, &dev->flags)) if (test_and_clear_bit(R5_Discard, &dev->flags))
clear_bit(R5_UPTODATE, &dev->flags); clear_bit(R5_UPTODATE, &dev->flags);
if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) {
WARN_ON(test_bit(R5_UPTODATE, &dev->flags));
dev->page = dev->orig_page;
}
wbi = dev->written; wbi = dev->written;
dev->written = NULL; dev->written = NULL;
while (wbi && wbi->bi_iter.bi_sector < while (wbi && wbi->bi_iter.bi_sector <
...@@ -3015,6 +3056,8 @@ static void handle_stripe_clean_event(struct r5conf *conf, ...@@ -3015,6 +3056,8 @@ static void handle_stripe_clean_event(struct r5conf *conf,
0); 0);
} else if (test_bit(R5_Discard, &dev->flags)) } else if (test_bit(R5_Discard, &dev->flags))
discard_pending = 1; discard_pending = 1;
WARN_ON(test_bit(R5_SkipCopy, &dev->flags));
WARN_ON(dev->page != dev->orig_page);
} }
if (!discard_pending && if (!discard_pending &&
test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
...@@ -3086,7 +3129,8 @@ static void handle_stripe_dirtying(struct r5conf *conf, ...@@ -3086,7 +3129,8 @@ static void handle_stripe_dirtying(struct r5conf *conf,
!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) &&
!(test_bit(R5_UPTODATE, &dev->flags) || !(test_bit(R5_UPTODATE, &dev->flags) ||
test_bit(R5_Wantcompute, &dev->flags))) { test_bit(R5_Wantcompute, &dev->flags))) {
if (test_bit(R5_Insync, &dev->flags)) rcw++; if (test_bit(R5_Insync, &dev->flags))
rcw++;
else else
rcw += 2*disks; rcw += 2*disks;
} }
...@@ -3107,10 +3151,10 @@ static void handle_stripe_dirtying(struct r5conf *conf, ...@@ -3107,10 +3151,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
!(test_bit(R5_UPTODATE, &dev->flags) || !(test_bit(R5_UPTODATE, &dev->flags) ||
test_bit(R5_Wantcompute, &dev->flags)) && test_bit(R5_Wantcompute, &dev->flags)) &&
test_bit(R5_Insync, &dev->flags)) { test_bit(R5_Insync, &dev->flags)) {
if ( if (test_bit(STRIPE_PREREAD_ACTIVE,
test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { &sh->state)) {
pr_debug("Read_old block " pr_debug("Read_old block %d for r-m-w\n",
"%d for r-m-w\n", i); i);
set_bit(R5_LOCKED, &dev->flags); set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantread, &dev->flags); set_bit(R5_Wantread, &dev->flags);
s->locked++; s->locked++;
...@@ -3133,10 +3177,9 @@ static void handle_stripe_dirtying(struct r5conf *conf, ...@@ -3133,10 +3177,9 @@ static void handle_stripe_dirtying(struct r5conf *conf,
!(test_bit(R5_UPTODATE, &dev->flags) || !(test_bit(R5_UPTODATE, &dev->flags) ||
test_bit(R5_Wantcompute, &dev->flags))) { test_bit(R5_Wantcompute, &dev->flags))) {
rcw++; rcw++;
if (!test_bit(R5_Insync, &dev->flags)) if (test_bit(R5_Insync, &dev->flags) &&
continue; /* it's a failed drive */ test_bit(STRIPE_PREREAD_ACTIVE,
if ( &sh->state)) {
test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
pr_debug("Read_old block " pr_debug("Read_old block "
"%d for Reconstruct\n", i); "%d for Reconstruct\n", i);
set_bit(R5_LOCKED, &dev->flags); set_bit(R5_LOCKED, &dev->flags);
...@@ -5031,8 +5074,8 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int ...@@ -5031,8 +5074,8 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int
bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded); bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded);
set_bit(STRIPE_SYNC_REQUESTED, &sh->state); set_bit(STRIPE_SYNC_REQUESTED, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
handle_stripe(sh);
release_stripe(sh); release_stripe(sh);
return STRIPE_SECTORS; return STRIPE_SECTORS;
...@@ -5072,7 +5115,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) ...@@ -5072,7 +5115,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
/* already done this stripe */ /* already done this stripe */
continue; continue;
sh = get_active_stripe(conf, sector, 0, 1, 0); sh = get_active_stripe(conf, sector, 0, 1, 1);
if (!sh) { if (!sh) {
/* failed to get a stripe - must wait */ /* failed to get a stripe - must wait */
...@@ -5354,6 +5397,50 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold, ...@@ -5354,6 +5397,50 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold,
raid5_show_preread_threshold, raid5_show_preread_threshold,
raid5_store_preread_threshold); raid5_store_preread_threshold);
static ssize_t
raid5_show_skip_copy(struct mddev *mddev, char *page)
{
struct r5conf *conf = mddev->private;
if (conf)
return sprintf(page, "%d\n", conf->skip_copy);
else
return 0;
}
static ssize_t
raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
{
struct r5conf *conf = mddev->private;
unsigned long new;
if (len >= PAGE_SIZE)
return -EINVAL;
if (!conf)
return -ENODEV;
if (kstrtoul(page, 10, &new))
return -EINVAL;
new = !!new;
if (new == conf->skip_copy)
return len;
mddev_suspend(mddev);
conf->skip_copy = new;
if (new)
mddev->queue->backing_dev_info.capabilities |=
BDI_CAP_STABLE_WRITES;
else
mddev->queue->backing_dev_info.capabilities &=
~BDI_CAP_STABLE_WRITES;
mddev_resume(mddev);
return len;
}
static struct md_sysfs_entry
raid5_skip_copy = __ATTR(skip_copy, S_IRUGO | S_IWUSR,
raid5_show_skip_copy,
raid5_store_skip_copy);
static ssize_t static ssize_t
stripe_cache_active_show(struct mddev *mddev, char *page) stripe_cache_active_show(struct mddev *mddev, char *page)
{ {
...@@ -5439,6 +5526,7 @@ static struct attribute *raid5_attrs[] = { ...@@ -5439,6 +5526,7 @@ static struct attribute *raid5_attrs[] = {
&raid5_stripecache_active.attr, &raid5_stripecache_active.attr,
&raid5_preread_bypass_threshold.attr, &raid5_preread_bypass_threshold.attr,
&raid5_group_thread_cnt.attr, &raid5_group_thread_cnt.attr,
&raid5_skip_copy.attr,
NULL, NULL,
}; };
static struct attribute_group raid5_attrs_group = { static struct attribute_group raid5_attrs_group = {
......
...@@ -232,7 +232,7 @@ struct stripe_head { ...@@ -232,7 +232,7 @@ struct stripe_head {
*/ */
struct bio req, rreq; struct bio req, rreq;
struct bio_vec vec, rvec; struct bio_vec vec, rvec;
struct page *page; struct page *page, *orig_page;
struct bio *toread, *read, *towrite, *written; struct bio *toread, *read, *towrite, *written;
sector_t sector; /* sector of this page */ sector_t sector; /* sector of this page */
unsigned long flags; unsigned long flags;
...@@ -299,6 +299,7 @@ enum r5dev_flags { ...@@ -299,6 +299,7 @@ enum r5dev_flags {
* data in, and now is a good time to write it out. * data in, and now is a good time to write it out.
*/ */
R5_Discard, /* Discard the stripe */ R5_Discard, /* Discard the stripe */
R5_SkipCopy, /* Don't copy data from bio to stripe cache */
}; };
/* /*
...@@ -436,6 +437,7 @@ struct r5conf { ...@@ -436,6 +437,7 @@ struct r5conf {
atomic_t pending_full_writes; /* full write backlog */ atomic_t pending_full_writes; /* full write backlog */
int bypass_count; /* bypassed prereads */ int bypass_count; /* bypassed prereads */
int bypass_threshold; /* preread nice */ int bypass_threshold; /* preread nice */
int skip_copy; /* Don't copy data from bio to stripe cache */
struct list_head *last_hold; /* detect hold_list promotions */ struct list_head *last_hold; /* detect hold_list promotions */
atomic_t reshape_stripes; /* stripes with pending writes for reshape */ atomic_t reshape_stripes; /* stripes with pending writes for reshape */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment